cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
ipc.hpp
Go to the documentation of this file.
1 
23 #pragma once
24 #ifndef CUDA_API_WRAPPERS_IPC_HPP_
25 #define CUDA_API_WRAPPERS_IPC_HPP_
26 
27 #include "context.hpp"
28 #include "types.hpp"
29 #include "error.hpp"
30 
31 #include <string>
32 
33 namespace cuda {
34 
36 class device_t;
37 class event_t;
39 
40 namespace memory {
41 
42 class pool_t;
43 
44 namespace ipc {
45 
50 using ptr_handle_t = CUipcMemHandle;
51 
52 class imported_ptr_t;
53 imported_ptr_t wrap(void * ptr, bool owning) noexcept;
54 
55 namespace detail_ {
56 
67 inline void* import(const ptr_handle_t& handle)
68 {
69  CUdeviceptr device_ptr;
70  auto status = cuIpcOpenMemHandle(&device_ptr, handle, CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);
71  throw_if_error_lazy(status, "Failed obtaining a device pointer from an IPC memory handle");
72  return memory::as_pointer(device_ptr);
73 }
74 
80 inline void unmap(void* ipc_mapped_ptr)
81 {
82  auto status = cuIpcCloseMemHandle(device::address(ipc_mapped_ptr));
83  throw_if_error_lazy(status, "Failed unmapping IPC memory mapped to " + cuda::detail_::ptr_as_hex(ipc_mapped_ptr));
84 }
85 
86 } // namespace detail_
87 
100 inline ptr_handle_t export_(void* device_ptr)
101 {
102  ptr_handle_t handle;
103  auto status = cuIpcGetMemHandle(&handle, device::address(device_ptr));
104  throw_if_error_lazy(status, "Failed producing an IPC memory handle for device pointer "
105  + cuda::detail_::ptr_as_hex(device_ptr));
106  return handle;
107 }
108 
118 protected: // constructors & destructor
119  imported_ptr_t(void* ptr, bool owning) : ptr_(ptr), owning_(owning)
120  {
121  if (ptr_ == nullptr) {
122  throw ::std::logic_error("IPC memory handle yielded a null pointer");
123  }
124  }
125 
126 public: // constructors & destructors
127  friend imported_ptr_t wrap(void * ptr, bool owning) noexcept;
128 
129  ~imported_ptr_t() noexcept(false)
130  {
131  if (owning_) { detail_::unmap(ptr_); }
132  }
133 
134 public: // operators
135 
136  imported_ptr_t(const imported_ptr_t& other) = delete;
137  imported_ptr_t& operator=(const imported_ptr_t& other) = delete;
138  imported_ptr_t& operator=(imported_ptr_t&& other) noexcept
139  {
140  ::std::swap(ptr_, other.ptr_);
141  ::std::swap(owning_, other.owning_);
142  return *this;
143  }
144  imported_ptr_t(imported_ptr_t&& other) noexcept = default;
145 
146 public: // getters
147 
149  template <typename T = void>
150  T* get() const noexcept
151  {
152  // If you're wondering why this cast is necessary - some IDEs/compilers
153  // have the notion that if the method is const, `ptr_` is a const void* within it
154  return static_cast<T*>(const_cast<void*>(ptr_));
155  }
156 
158  bool is_owning() const noexcept { return owning_; }
159 
160 protected: // data members
161  void* ptr_;
162  bool owning_;
163 }; // class imported_ptr_t
164 
166 inline imported_ptr_t wrap(void * ptr, bool owning) noexcept
167 {
168  return imported_ptr_t(ptr, owning);
169 }
170 
172 inline imported_ptr_t import(const ptr_handle_t& ptr_handle)
173 {
174  auto raw_ptr = detail_::import(ptr_handle);
175  return wrap(raw_ptr, do_take_ownership);
176 }
177 
178 } // namespace ipc
179 
180 #if CUDA_VERSION >= 11020
181 namespace pool {
182 
183 namespace ipc {
184 
185 using handle_t = void *;
186 
187 template <shared_handle_kind_t Kind>
188 shared_handle_t<Kind> export_(const pool_t& pool);
189 
190 namespace detail_ {
191 
192 template <shared_handle_kind_t Kind>
193 pool::handle_t import(const shared_handle_t<Kind>& shared_pool_handle)
194 {
195  memory::pool::handle_t result;
196  static constexpr const unsigned long long flags { 0 };
197  void * ptr_to_handle = static_cast<void*>(const_cast<shared_handle_t<Kind>*>(&shared_pool_handle));
198  auto status = cuMemPoolImportFromShareableHandle(
199  &result, ptr_to_handle, static_cast<CUmemAllocationHandleType>(Kind), flags);
200  throw_if_error_lazy(status, "Importing an IPC-shared memory pool handle");
201  return result;
202 }
203 
204 } // namespace detail_
205 
206 template <shared_handle_kind_t Kind>
207 pool_t import(const device_t& device, const shared_handle_t<Kind>& shared_pool_handle);
208 
209 inline ptr_handle_t export_ptr(void* pool_allocated) {
210  ptr_handle_t handle;
211  auto status = cuMemPoolExportPointer(&handle, device::address(pool_allocated));
212  throw_if_error_lazy(status,
213  "Failed producing an IPC handle for memory-pool-allocated pointer "
214  + cuda::detail_::ptr_as_hex(pool_allocated));
215  return handle;
216 }
217 
218 namespace detail_ {
219 
220 inline void* import_ptr(const pool::handle_t pool_handle, const ptr_handle_t& handle)
221 {
222  CUdeviceptr imported;
223  auto status = cuMemPoolImportPointer(&imported, pool_handle, const_cast<ptr_handle_t*>(&handle));
224  throw_if_error_lazy(status, "Failed importing an IPC-exported a pool-allocated pointer");
225  return as_pointer(imported);
226 }
227 
228 } // namespace detail_
229 
241 class imported_ptr_t;
242 
243 imported_ptr_t import_ptr(const pool_t& shared_pool, const ptr_handle_t& ptr_handle);
244 imported_ptr_t import_ptr(const pool_t& shared_pool, const ptr_handle_t& ptr_handle, const stream_t& freeing_stream);
245 
246 } // namespace ipc
247 
248 } // namespace pool
249 #endif // CUDA_VERSION >= 11020
250 
251 } // namespace memory
252 
253 namespace event {
254 namespace ipc {
255 
260 using handle_t = CUipcEventHandle;
261 
262 namespace detail_ {
263 
264 inline handle_t export_(event::handle_t event_handle)
265 {
266  handle_t ipc_handle;
267  auto status = cuIpcGetEventHandle(&ipc_handle, event_handle);
268  throw_if_error_lazy(status, "Failed obtaining an IPC event handle for " +
269  event::detail_::identify(event_handle));
270  return ipc_handle;
271 }
272 
273 inline event::handle_t import(const handle_t& handle)
274 {
275  event::handle_t event_handle;
276  auto status = cuIpcOpenEventHandle(&event_handle, handle);
277  throw_if_error_lazy(status, "Failed obtaining an event handle from an IPC event handle");
278  return event_handle;
279 }
280 
281 } // namespace detail_
282 
291 inline handle_t export_(const event_t& event);
292 
303 
308 inline event_t import(const device_t& device, const handle_t& event_ipc_handle);
309 
315 inline event_t import(const context_t& context, const handle_t& event_ipc_handle);
317 
318 } // namespace ipc
319 } // namespace event
320 } // namespace cuda
321 
322 #endif // CUDA_API_WRAPPERS_IPC_HPP_
Proxy class for a CUDA stream.
Definition: stream.hpp:246
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
handle_t export_(const event_t &event)
Enable use of an event which this process created by other processes.
Definition: event.hpp:71
event_t import(const context_t &context, const handle_t &event_ipc_handle)
Definition: event.hpp:76
Wrapper class for a CUDA event.
Definition: event.hpp:133
Contains a proxy class for CUDA execution contexts.
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217
CUipcEventHandle handle_t
The concrete value passed between processes, used to tell the CUDA Runtime API which event is desired...
Definition: ipc.hpp:260
CUipcMemHandle ptr_handle_t
The concrete value passed between processes, used to tell the CUDA Runtime API which memory area is d...
Definition: ipc.hpp:50
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool is_owning() const noexcept
Definition: ipc.hpp:158
A smart-pointer-like class for memory obtained via inter-process communication.
Definition: ipc.hpp:117
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
Wrapper class for a CUDA device.
Definition: device.hpp:135
Fundamental CUDA-related type definitions.