cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
memory_pool.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
9 #define CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
10 
11 #if CUDA_VERSION >= 11020
12 
13 #include "memory.hpp"
14 
15 namespace cuda {
16 
17 namespace memory {
18 
20 class pool_t;
22 
23 namespace pool {
24 
25 using handle_t = cudaMemPool_t;
26 
27 namespace detail_ {
28 
35 inline CUmemLocation create_mem_location(cuda::device::id_t device_id) noexcept
36 {
37  CUmemLocation result;
38  result.id = device_id;
39  result.type = CU_MEM_LOCATION_TYPE_DEVICE;
40  return result;
41 }
42 
43 #if CUDA_VERSION >= 11020
44 template<pool::shared_handle_kind_t SharedHandleKind = pool::shared_handle_kind_t::no_export>
45 #else
46 template<pool::shared_handle_kind_t SharedHandleKind>
47 #endif
48 CUmemPoolProps create_raw_properties(cuda::device::id_t device_id) noexcept
49 {
50  CUmemPoolProps result;
51 
52  // We set the pool properties structure to 0, since it seems the CUDA driver
53  // isn't too fond of arbitrary values, e.g. in the reserved fields
54  ::std::memset(&result, 0, sizeof(CUmemPoolProps));
55 
56  result.location = create_mem_location(device_id);
57  result.allocType = CU_MEM_ALLOCATION_TYPE_PINNED;
58  result.handleTypes = static_cast<CUmemAllocationHandleType>(SharedHandleKind);
59  result.win32SecurityAttributes = nullptr; // TODO: What about the case of win32_handle ?
60  return result;
61 }
62 
63 inline ::std::string identify(pool::handle_t handle)
64 {
65  return "memory pool at " + cuda::detail_::ptr_as_hex(handle);
66 }
67 
68 inline ::std::string identify(pool::handle_t handle, cuda::device::id_t device_id)
69 {
70  return identify(handle) + " on " + cuda::device::detail_::identify(device_id);
71 }
72 
73 ::std::string identify(const pool_t &pool);
74 
75 inline status_t destroy_nothrow(handle_t handle) noexcept
76 {
77  return cuMemPoolDestroy(handle);
78 }
79 
80 inline void destroy(handle_t handle)
81 {
82  auto status = destroy_nothrow(handle);
83  throw_if_error_lazy(status, "Failed destroying " + identify(handle));
84 }
85 
86 
87 } // namespace detail_
88 
89 using attribute_t = CUmemPool_attribute;
90 
91 namespace detail_ {
92 
93 template <attribute_t attribute> struct attribute_value {};
94 
95 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES> { using type = bool; };
96 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC> { using type = bool; };
97 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES> { using type = bool; };
98 template <> struct attribute_value<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD> { using type = size_t; };
99 #if CUDA_VERSION >= 11030
100 template <> struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT> { using type = size_t; };
101 template <> struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH> { using type = size_t; };
102 template <> struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_CURRENT> { using type = size_t; };
103 template <> struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_HIGH> { using type = size_t; };
104 #endif
105 
106 template <typename T> struct attribute_value_inner_type { using type = T; };
107 template <> struct attribute_value_inner_type<bool> { using type = int; };
108 template <> struct attribute_value_inner_type<size_t> { using type = cuuint64_t; };
109 
110 template <typename T>
111 using attribute_value_inner_type_t = typename attribute_value_inner_type<T>::type;
112 
113 } // namespace detail_
114 
115 
116 template <attribute_t attribute>
117 using attribute_value_t = typename detail_::attribute_value<attribute>::type;
118 
119 namespace detail_ {
120 
121 template<attribute_t attribute>
122 struct status_and_attribute_value {
123  status_t status;
124  attribute_value_t<attribute> value;
125 };
126 
127 template<attribute_t attribute>
128 status_and_attribute_value<attribute> get_attribute_with_status(handle_t pool_handle)
129 {
130  using outer_type = attribute_value_t <attribute>;
131  using inner_type = attribute_value_inner_type_t<outer_type>;
132  inner_type attribute_value;
133  auto status = cuMemPoolGetAttribute(pool_handle, attribute, &attribute_value);
134  return { status, static_cast<outer_type>(attribute_value) };
135 }
136 
137 template<attribute_t attribute>
138 attribute_value_t<attribute> get_attribute(handle_t pool_handle)
139 {
140  auto status_and_attribute_value = get_attribute_with_status<attribute>(pool_handle);
141  throw_if_error_lazy(status_and_attribute_value.status,
142  "Obtaining attribute " + ::std::to_string(static_cast<int>(attribute))
143  + " of " + detail_::identify(pool_handle));
144  return status_and_attribute_value.value;
145 }
146 
147 template<attribute_t attribute>
148 void set_attribute(handle_t pool_handle, attribute_value_t<attribute> value)
149 {
150  using outer_type = attribute_value_t <attribute>;
151  using inner_type = typename attribute_value_inner_type<outer_type>::type;
152  inner_type value_ = static_cast<inner_type>(value);
153  auto status = cuMemPoolSetAttribute(pool_handle, attribute, &value_);
154  throw_if_error_lazy(status, "Setting attribute " + ::std::to_string(static_cast<int>(attribute))
155  + " of " + detail_::identify(pool_handle));
156 }
157 
158 } // namespace detail_
159 
169 pool_t wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept;
170 
171 } // namespace pool
172 
173 
174 namespace detail_ {
175 
176 inline permissions_t get_permissions(cuda::device::id_t device_id, pool::handle_t pool_handle)
177 {
178  CUmemAccess_flags access_flags;
179  auto mem_location = pool::detail_::create_mem_location(device_id);
180  auto status = cuMemPoolGetAccess(&access_flags, pool_handle, &mem_location);
181  throw_if_error_lazy(status,
182  "Determining access information for " + cuda::device::detail_::identify(device_id)
183  + " to " + pool::detail_::identify(pool_handle));
184  return permissions::detail_::from_flags(access_flags);
185 }
186 
187 inline void set_permissions(span<cuda::device::id_t> device_ids, pool::handle_t pool_handle, permissions_t permissions)
188 {
189  if (permissions.write and not permissions.read) {
190  throw ::std::invalid_argument("Memory pool access get_permissions cannot be write-only");
191  }
192 
193  CUmemAccess_flags flags = permissions.read ?
194  (permissions.write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
195  CU_MEM_ACCESS_FLAGS_PROT_NONE;
196 
197  ::std::vector<CUmemAccessDesc> descriptors;
198  descriptors.reserve(device_ids.size());
199  // TODO: This could use a zip iterator
200  for(auto device_id : device_ids) {
201  CUmemAccessDesc desc;
202  desc.flags = flags;
203  desc.location = pool::detail_::create_mem_location(device_id);
204  descriptors.push_back(desc);
205  }
206 
207  auto status = cuMemPoolSetAccess(pool_handle, descriptors.data(), descriptors.size());
208  throw_if_error_lazy(status,
209  "Setting access get_permissions for " + ::std::to_string(descriptors.size())
210  + " devices to " + pool::detail_::identify(pool_handle));
211 }
212 
213 inline void set_permissions(cuda::device::id_t device_id, pool::handle_t pool_handle, permissions_t permissions)
214 {
215  if (permissions.write and not permissions.read) {
216  throw ::std::invalid_argument("Memory pool access get_permissions cannot be write-only");
217  }
218 
219  CUmemAccessDesc desc;
220  desc.flags = permissions.read ?
221  (permissions.write ?
222  CU_MEM_ACCESS_FLAGS_PROT_READWRITE :
223  CU_MEM_ACCESS_FLAGS_PROT_READ) :
224  CU_MEM_ACCESS_FLAGS_PROT_NONE;
225 
226  desc.location = pool::detail_::create_mem_location(device_id);
227  auto status = cuMemPoolSetAccess(pool_handle, &desc, 1);
228  throw_if_error_lazy(status,
229  "Setting access get_permissions for " + cuda::device::detail_::identify(device_id)
230  + " to " + pool::detail_::identify(pool_handle));
231 }
232 
233 } // namespace detail_
234 
235 permissions_t get_permissions(const cuda::device_t& device, const pool_t& pool);
236 void set_permissions(const cuda::device_t& device, const pool_t& pool, permissions_t permissions);
237 template <typename DeviceRange>
238 void get_permissions(DeviceRange devices, const pool_t& pool_handle, permissions_t permissions);
239 
240 namespace pool {
241 
242 struct reuse_policy_t {
247  bool when_dependent_on_free;
248 
253  bool independent_but_actually_freed;
254 
260  bool allow_waiting_for_frees;
261 };
262 
263 namespace ipc {
264 
265 class imported_ptr_t;
266 
267 } // namespace ipc
268 
269 } // namespace pool
270 
275 class pool_t {
276 
277 public:
278  region_t allocate(const stream_t& stream, size_t num_bytes) const;
279 
280  pool::ipc::imported_ptr_t import(const memory::pool::ipc::ptr_handle_t& exported_handle) const;
281 
282  void trim(size_t min_bytes_to_keep) const
283  {
284  auto status = cuMemPoolTrimTo(handle_, min_bytes_to_keep);
285  throw_if_error_lazy(status, "Attempting to trim " + pool::detail_::identify(*this)
286  + " down to " + ::std::to_string(min_bytes_to_keep));
287  }
288 
289  template<pool::attribute_t attribute>
290  pool::attribute_value_t<attribute> get_attribute() const
291  {
292  auto attribute_with_status = pool::detail_::get_attribute_with_status<attribute>(handle_);
293  throw_if_error_lazy(attribute_with_status.status, "Failed obtaining attribute "
294  + ::std::to_string(static_cast<int>(attribute)) + " of " + pool::detail_::identify(*this));
295  return attribute_with_status.value;
296  }
297 
298  template<pool::attribute_t attribute>
299  void set_attribute(const pool::attribute_value_t<attribute>& value) const
300  {
301  using outer_type = pool::attribute_value_t <attribute>;
302  using inner_type = typename pool::detail_::attribute_value_inner_type<outer_type>::type;
303  auto inner_value = static_cast<inner_type>(value);
304  auto status = cuMemPoolSetAttribute(handle_, attribute, &inner_value);
305  throw_if_error_lazy(status, "Failed setting attribute " + ::std::to_string(static_cast<int>(attribute))
306  + " of " + pool::detail_::identify(*this));
307  }
308 
309  size_t release_threshold() const
310  {
311  return static_cast<size_t>(get_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>());
312  }
313 
314  void set_release_threshold(size_t threshold) const
315  {
316  set_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>(threshold);
317  }
318 
319  permissions_t permissions(const cuda::device_t& device)
320  {
321  return memory::get_permissions(device, *this);
322  }
323 
333 
338  void set_permissions(const cuda::device_t& device, permissions_t permissions)
339  {
340  return memory::set_permissions(device, *this, permissions);
341  }
342 
347  template <typename DeviceRange>
348  void set_permissions(DeviceRange devices, permissions_t permissions)
349  {
350  return memory::set_permissions(devices, *this, permissions);
351  }
353 
354 public: // non-field getters
355 
360  pool::reuse_policy_t reuse_policy() const
361  {
362  return {
363  get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(),
364  get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(),
365  get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>()
366  };
367  }
368 
373  void set_reuse_policy(pool::reuse_policy_t reuse_policy) const
374  {
375  set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(reuse_policy.when_dependent_on_free);
376  set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(reuse_policy.independent_but_actually_freed);
377  set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(reuse_policy.allow_waiting_for_frees);
378  }
379 
380 #if CUDA_VERSION >= 11030
381 
385  size_t backing_memory_size() const {
386  return get_attribute<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT>();
387  }
388 #endif
389 
390 public: // field getters
394  pool::handle_t handle() const noexcept { return handle_; }
398  cuda::device::id_t device_id() const noexcept { return device_id_; }
402  cuda::device_t device() const noexcept;
407  bool is_owning() const noexcept { return owning_; }
408 
409 
410 public: // construction & destruction
411  friend pool_t pool::wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept;
412 
413  pool_t(const pool_t& other) = delete;
414 
415  pool_t(pool_t&& other) noexcept : pool_t(other.device_id_, other.handle_, other.owning_)
416  {
417  other.owning_ = false;
418  }
419 
420  ~pool_t() DESTRUCTOR_EXCEPTION_SPEC
421  {
422  if (not owning_) { return; }
423 #ifdef THROW_IN_DESTRUCTORS
424  pool::detail_::destroy(handle_);
425 #else
426  memory::pool::detail_::destroy_nothrow(handle_);
427 #endif
428  }
429 
430 protected: // constructors
431  pool_t(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept
432  : device_id_(device_id), handle_(handle), owning_(owning)
433  { }
434 
435 protected: // data members
436  cuda::device::id_t device_id_;
437  pool::handle_t handle_;
438  bool owning_;
439 }; // class pool_t
440 
441 inline bool operator==(const pool_t& lhs, const pool_t& rhs)
442 {
443  // Note: Not comparing the ownership status
444  return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
445 }
446 
447 inline bool operator!=(const pool_t& lhs, const pool_t& rhs)
448 {
449  return not (lhs == rhs);
450 }
451 
452 namespace pool {
453 
454 inline pool_t wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept
455 {
456  return { device_id, handle, owning };
457 }
458 
459 namespace detail_ {
460 
461 template<shared_handle_kind_t SharedHandleKind = shared_handle_kind_t::no_export>
462 pool_t create(cuda::device::id_t device_id)
463 {
464  auto props = create_raw_properties<SharedHandleKind>(device_id);
465  handle_t handle;
466  auto status = cuMemPoolCreate(&handle, &props);
467  throw_if_error_lazy(status, "Failed creating a memory pool on device " + cuda::device::detail_::identify(device_id));
468  constexpr const bool is_owning { true };
469  return wrap(device_id, handle, is_owning);
470 }
471 
472 inline ::std::string identify(const pool_t& pool)
473 {
474  return identify(pool.handle(), pool.device_id());
475 }
476 
477 } // namespace detail_
478 
479 template<shared_handle_kind_t SharedHandleKind>
480 pool_t create(const cuda::device_t& device);
481 
482 } // namespace pool
483 
484 } // namespace memory
485 
486 } // namespace cuda
487 
488 #endif // CUDA_VERSION >= 11020
489 
490 #endif // CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74