cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
memory_pool.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
9 #define CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
10 
11 #if CUDA_VERSION >= 11020
12 
13 #include "memory.hpp"
14 
15 namespace cuda {
16 
17 namespace memory {
18 
20 class pool_t;
22 
23 namespace pool {
24 
25 using handle_t = cudaMemPool_t;
26 
27 namespace detail_ {
28 
35 inline CUmemLocation create_mem_location(cuda::device::id_t device_id) noexcept
36 {
37  CUmemLocation result;
38  result.id = device_id;
39  result.type = CU_MEM_LOCATION_TYPE_DEVICE;
40  return result;
41 }
42 
43 #if CUDA_VERSION >= 11020
44 template<pool::shared_handle_kind_t SharedHandleKind = pool::shared_handle_kind_t::no_export>
45 #else
46 template<pool::shared_handle_kind_t SharedHandleKind>
47 #endif
48 CUmemPoolProps create_raw_properties(cuda::device::id_t device_id) noexcept
49 {
50  CUmemPoolProps result;
51 
52  // We set the pool properties structure to 0, since it seems the CUDA driver
53  // isn't too fond of arbitrary values, e.g. in the reserved fields
54  ::std::memset(&result, 0, sizeof(CUmemPoolProps));
55 
56  result.location = create_mem_location(device_id);
57  result.allocType = CU_MEM_ALLOCATION_TYPE_PINNED;
58  result.handleTypes = static_cast<CUmemAllocationHandleType>(SharedHandleKind);
59  result.win32SecurityAttributes = nullptr; // TODO: What about the case of win32_handle ?
60  return result;
61 }
62 
63 inline ::std::string identify(pool::handle_t handle)
64 {
65  return "memory pool at " + cuda::detail_::ptr_as_hex(handle);
66 }
67 
68 inline ::std::string identify(pool::handle_t handle, cuda::device::id_t device_id)
69 {
70  return identify(handle) + " on " + cuda::device::detail_::identify(device_id);
71 }
72 
73 ::std::string identify(const pool_t &pool);
74 
75 
76 
77 } // namespace detail_
78 
79 using attribute_t = CUmemPool_attribute;
80 
81 namespace detail_ {
82 
83 template <attribute_t attribute> struct attribute_value {};
84 
85 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES> { using type = bool; };
86 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC> { using type = bool; };
87 template <> struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES> { using type = bool; };
88 template <> struct attribute_value<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD> { using type = size_t; };
89 #if CUDA_VERSION >= 11030
90 template <> struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT> { using type = size_t; };
91 template <> struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH> { using type = size_t; };
92 template <> struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_CURRENT> { using type = size_t; };
93 template <> struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_HIGH> { using type = size_t; };
94 #endif
95 
96 template <typename T> struct attribute_value_inner_type { using type = T; };
97 template <> struct attribute_value_inner_type<bool> { using type = int; };
98 template <> struct attribute_value_inner_type<size_t> { using type = cuuint64_t; };
99 
100 template <typename T>
101 using attribute_value_inner_type_t = typename attribute_value_inner_type<T>::type;
102 
103 } // namespace detail_
104 
105 
106 template <attribute_t attribute>
107 using attribute_value_t = typename detail_::attribute_value<attribute>::type;
108 
109 namespace detail_ {
110 
111 template<attribute_t attribute>
112 struct status_and_attribute_value {
113  status_t status;
114  attribute_value_t<attribute> value;
115 };
116 
117 template<attribute_t attribute>
118 status_and_attribute_value<attribute> get_attribute_with_status(handle_t pool_handle)
119 {
120  using outer_type = attribute_value_t <attribute>;
121  using inner_type = attribute_value_inner_type_t<outer_type>;
122  inner_type attribute_value;
123  auto status = cuMemPoolGetAttribute(pool_handle, attribute, &attribute_value);
124  return { status, static_cast<outer_type>(attribute_value) };
125 }
126 
127 template<attribute_t attribute>
128 attribute_value_t<attribute> get_attribute(handle_t pool_handle)
129 {
130  auto status_and_attribute_value = get_attribute_with_status<attribute>(pool_handle);
131  throw_if_error_lazy(status_and_attribute_value.status,
132  "Obtaining attribute " + ::std::to_string(static_cast<int>(attribute))
133  + " of " + detail_::identify(pool_handle));
134  return status_and_attribute_value.value;
135 }
136 
137 template<attribute_t attribute>
138 void set_attribute(handle_t pool_handle, attribute_value_t<attribute> value)
139 {
140  using outer_type = attribute_value_t <attribute>;
141  using inner_type = typename attribute_value_inner_type<outer_type>::type;
142  inner_type value_ = static_cast<inner_type>(value);
143  auto status = cuMemPoolSetAttribute(pool_handle, attribute, &value_);
144  throw_if_error_lazy(status, "Setting attribute " + ::std::to_string(static_cast<int>(attribute))
145  + " of " + detail_::identify(pool_handle));
146 }
147 
148 } // namespace detail_
149 
159 pool_t wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept;
160 
161 } // namespace pool
162 
163 
164 namespace detail_ {
165 
166 inline permissions_t get_permissions(cuda::device::id_t device_id, pool::handle_t pool_handle)
167 {
168  CUmemAccess_flags access_flags;
169  auto mem_location = pool::detail_::create_mem_location(device_id);
170  auto status = cuMemPoolGetAccess(&access_flags, pool_handle, &mem_location);
171  throw_if_error_lazy(status,
172  "Determining access information for " + cuda::device::detail_::identify(device_id)
173  + " to " + pool::detail_::identify(pool_handle));
174  return permissions::detail_::from_flags(access_flags);
175 }
176 
177 inline void set_permissions(span<cuda::device::id_t> device_ids, pool::handle_t pool_handle, permissions_t permissions)
178 {
179  if (permissions.write and not permissions.read) {
180  throw ::std::invalid_argument("Memory pool access get_permissions cannot be write-only");
181  }
182 
183  CUmemAccess_flags flags = permissions.read ?
184  (permissions.write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
185  CU_MEM_ACCESS_FLAGS_PROT_NONE;
186 
187  ::std::vector<CUmemAccessDesc> descriptors;
188  descriptors.reserve(device_ids.size());
189  // TODO: This could use a zip iterator
190  for(auto device_id : device_ids) {
191  CUmemAccessDesc desc;
192  desc.flags = flags;
193  desc.location = pool::detail_::create_mem_location(device_id);
194  descriptors.push_back(desc);
195  }
196 
197  auto status = cuMemPoolSetAccess(pool_handle, descriptors.data(), descriptors.size());
198  throw_if_error_lazy(status,
199  "Setting access get_permissions for " + ::std::to_string(descriptors.size())
200  + " devices to " + pool::detail_::identify(pool_handle));
201 }
202 
203 inline void set_permissions(cuda::device::id_t device_id, pool::handle_t pool_handle, permissions_t permissions)
204 {
205  if (permissions.write and not permissions.read) {
206  throw ::std::invalid_argument("Memory pool access get_permissions cannot be write-only");
207  }
208 
209  CUmemAccessDesc desc;
210  desc.flags = permissions.read ?
211  (permissions.write ?
212  CU_MEM_ACCESS_FLAGS_PROT_READWRITE :
213  CU_MEM_ACCESS_FLAGS_PROT_READ) :
214  CU_MEM_ACCESS_FLAGS_PROT_NONE;
215 
216  desc.location = pool::detail_::create_mem_location(device_id);
217  auto status = cuMemPoolSetAccess(pool_handle, &desc, 1);
218  throw_if_error_lazy(status,
219  "Setting access get_permissions for " + cuda::device::detail_::identify(device_id)
220  + " to " + pool::detail_::identify(pool_handle));
221 }
222 
223 } // namespace detail_
224 
225 permissions_t get_permissions(const cuda::device_t& device, const pool_t& pool);
226 void set_permissions(const cuda::device_t& device, const pool_t& pool, permissions_t permissions);
227 template <typename DeviceRange>
228 void get_permissions(DeviceRange devices, const pool_t& pool_handle, permissions_t permissions);
229 
230 namespace pool {
231 
232 struct reuse_policy_t {
237  bool when_dependent_on_free;
238 
243  bool independent_but_actually_freed;
244 
250  bool allow_waiting_for_frees;
251 };
252 
253 namespace ipc {
254 
255 class imported_ptr_t;
256 
257 } // namespace ipc
258 
259 } // namespace pool
260 
265 class pool_t {
266 
267 public:
268  region_t allocate(const stream_t& stream, size_t num_bytes) const;
269 
270  pool::ipc::imported_ptr_t import(const memory::pool::ipc::ptr_handle_t& exported_handle) const;
271 
272  void trim(size_t min_bytes_to_keep) const
273  {
274  auto status = cuMemPoolTrimTo(handle_, min_bytes_to_keep);
275  throw_if_error_lazy(status, "Attempting to trim " + pool::detail_::identify(*this)
276  + " down to " + ::std::to_string(min_bytes_to_keep));
277  }
278 
279  template<pool::attribute_t attribute>
280  pool::attribute_value_t<attribute> get_attribute() const
281  {
282  auto attribute_with_status = pool::detail_::get_attribute_with_status<attribute>(handle_);
283  throw_if_error_lazy(attribute_with_status.status, "Failed obtaining attribute "
284  + ::std::to_string(static_cast<int>(attribute)) + " of " + pool::detail_::identify(*this));
285  return attribute_with_status.value;
286  }
287 
288  template<pool::attribute_t attribute>
289  void set_attribute(const pool::attribute_value_t<attribute>& value) const
290  {
291  using outer_type = pool::attribute_value_t <attribute>;
292  using inner_type = typename pool::detail_::attribute_value_inner_type<outer_type>::type;
293  auto inner_value = static_cast<inner_type>(value);
294  auto status = cuMemPoolSetAttribute(handle_, attribute, &inner_value);
295  throw_if_error_lazy(status, "Failed setting attribute " + ::std::to_string(static_cast<int>(attribute))
296  + " of " + pool::detail_::identify(*this));
297  }
298 
299  size_t release_threshold() const
300  {
301  return static_cast<size_t>(get_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>());
302  }
303 
304  void set_release_threshold(size_t threshold) const
305  {
306  set_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>(threshold);
307  }
308 
309  permissions_t permissions(const cuda::device_t& device)
310  {
311  return memory::get_permissions(device, *this);
312  }
313 
323 
328  void set_permissions(const cuda::device_t& device, permissions_t permissions)
329  {
330  return memory::set_permissions(device, *this, permissions);
331  }
332 
337  template <typename DeviceRange>
338  void set_permissions(DeviceRange devices, permissions_t permissions)
339  {
340  return memory::set_permissions(devices, *this, permissions);
341  }
343 
344 public: // non-field getters
345 
350  pool::reuse_policy_t reuse_policy() const
351  {
352  return {
353  get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(),
354  get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(),
355  get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>()
356  };
357  }
358 
363  void set_reuse_policy(pool::reuse_policy_t reuse_policy) const
364  {
365  set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(reuse_policy.when_dependent_on_free);
366  set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(reuse_policy.independent_but_actually_freed);
367  set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(reuse_policy.allow_waiting_for_frees);
368  }
369 
370 #if CUDA_VERSION >= 11030
371 
375  size_t backing_memory_size() const {
376  return get_attribute<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT>();
377  }
378 #endif
379 
380 public: // field getters
384  pool::handle_t handle() const noexcept { return handle_; }
388  cuda::device::id_t device_id() const noexcept { return device_id_; }
392  cuda::device_t device() const noexcept;
397  bool is_owning() const noexcept { return owning_; }
398 
399 
400 public: // construction & destruction
401  friend pool_t pool::wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept;
402 
403  pool_t(const pool_t& other) = delete;
404 
405  pool_t(pool_t&& other) noexcept : pool_t(other.device_id_, other.handle_, other.owning_)
406  {
407  other.owning_ = false;
408  }
409 
410  ~pool_t()
411  {
412  if (owning_) {
413  cuMemPoolDestroy(handle_); // Note: Ignoring any potential exception
414  }
415  }
416 
417 protected: // constructors
418  pool_t(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept
419  : device_id_(device_id), handle_(handle), owning_(owning)
420  { }
421 
422 protected: // data members
423  cuda::device::id_t device_id_;
424  pool::handle_t handle_;
425  bool owning_;
426 }; // class pool_t
427 
428 inline bool operator==(const pool_t& lhs, const pool_t& rhs)
429 {
430  // Note: Not comparing the ownership status
431  return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
432 }
433 
434 inline bool operator!=(const pool_t& lhs, const pool_t& rhs)
435 {
436  return not (lhs == rhs);
437 }
438 
439 namespace pool {
440 
441 inline pool_t wrap(cuda::device::id_t device_id, pool::handle_t handle, bool owning) noexcept
442 {
443  return { device_id, handle, owning };
444 }
445 
446 namespace detail_ {
447 
448 template<shared_handle_kind_t SharedHandleKind = shared_handle_kind_t::no_export>
449 pool_t create(cuda::device::id_t device_id)
450 {
451  auto props = create_raw_properties<SharedHandleKind>(device_id);
452  handle_t handle;
453  auto status = cuMemPoolCreate(&handle, &props);
454  throw_if_error_lazy(status, "Failed creating a memory pool on device " + cuda::device::detail_::identify(device_id));
455  constexpr const bool is_owning { true };
456  return wrap(device_id, handle, is_owning);
457 }
458 
459 inline ::std::string identify(const pool_t& pool)
460 {
461  return identify(pool.handle(), pool.device_id());
462 }
463 
464 } // namespace detail_
465 
466 template<shared_handle_kind_t SharedHandleKind>
467 pool_t create(const cuda::device_t& device);
468 
469 } // namespace pool
470 
471 } // namespace memory
472 
473 } // namespace cuda
474 
475 #endif // CUDA_VERSION >= 11020
476 
477 #endif // CUDA_API_WRAPPERS_MEMORY_POOL_HPP_
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77