cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
virtual_memory.hpp
Go to the documentation of this file.
1 
4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
6 
7 // We need this out of the #ifdef, as otherwise we don't know what
8 // the CUDA_VERSION is...
9 #include <cuda.h>
10 
11 #if CUDA_VERSION >= 10020
12 #include "types.hpp"
13 #include "error.hpp"
14 
15 namespace cuda {
16 
18 class device_t;
20 
21 // TODO: Perhaps move this down into the device namespace ?
22 namespace memory {
23 
25 class physical_allocation_t;
27 
28 namespace physical_allocation {
29 
30 using handle_t = CUmemGenericAllocationHandle;
31 
32 namespace detail_ {
33 
34 physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit);
35 
36 } // namespace detail_
37 
38 namespace detail_ {
39 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
40  minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
41  recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
42 };
43 
44 } // namespace detail_
45 
46 // Note: Not inheriting from CUmemAllocationProp_st, since
47 // that structure is a bit messed up
48 struct properties_t {
49  // Note: Specifying a compression type is currently unsupported,
50  // as the driver API does not document semantics for the relevant
51  // properties field
52 
53 public: // getters
54  cuda::device_t device() const;
55 
56  // TODO: Is this only relevant to requests?
57  shared_handle_kind_t requested_kind() const
58  {
59  return shared_handle_kind_t(raw.requestedHandleTypes);
60  };
61 
62 protected: // non-mutators
63  size_t granularity(detail_::granularity_kind_t kind) const {
64  size_t result;
65  auto status = cuMemGetAllocationGranularity(&result, &raw,
66  static_cast<CUmemAllocationGranularity_flags>(kind));
67  throw_if_error_lazy(status, "Could not determine physical allocation granularity");
68  return result;
69  }
70 
71 public: // non-mutators
72  size_t minimum_granularity() const { return granularity(detail_::granularity_kind_t::minimum_required); }
73  size_t recommended_granularity() const { return granularity(detail_::granularity_kind_t::recommended_for_performance); }
74 
75 public:
76  properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
77  {
78  if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
79  throw ::std::runtime_error("Unexpected physical_allocation type - we only know about devices!");
80  }
81  }
82 
83  properties_t(properties_t&&) = default;
84  properties_t(const properties_t&) = default;
85 
86 public:
87  CUmemAllocationProp_st raw;
88 
89 };
90 
91 namespace detail_ {
92 
93 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
94 properties_t create_properties(cuda::device::id_t device_id)
95 {
96  CUmemAllocationProp_st raw_props{};
97  raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
98  raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
99  raw_props.location.id = static_cast<int>(device_id);
100  raw_props.requestedHandleTypes = static_cast<CUmemAllocationHandleType>(SharedHandleKind);
101  raw_props.win32HandleMetaData = nullptr;
102  return properties_t{raw_props};
103 }
104 
105 } // namespace detail_
106 
107 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
108 properties_t create_properties_for(const device_t& device);
109 
110 } // namespace physical_allocation
111 
112 namespace virtual_ {
113 
114 class reserved_address_range_t;
115 class mapping_t;
116 
117 namespace detail_ {
118 
119 inline status_t cancel_reservation_nothrow(memory::region_t reserved) noexcept
120 {
121  return cuMemAddressFree(memory::device::address(reserved.start()), reserved.size());
122 }
123 
124 inline void cancel_reservation(memory::region_t reserved)
125 {
126  auto status = cancel_reservation_nothrow(reserved);
127  throw_if_error_lazy(status, "Failed freeing a reservation of " + memory::detail_::identify(reserved));
128 }
129 
130 } // namespace detail_
131 
132 using alignment_t = size_t;
133 
134 enum alignment : alignment_t {
135  default_ = 0,
136  trivial = 1
137 };
138 
139 namespace detail_ {
140 
141 reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership);
142 
143 } // namespace detail_
144 
145 
146 class reserved_address_range_t {
147 protected:
148 
149  reserved_address_range_t(region_t region, alignment_t alignment, bool owning) noexcept
150  : region_(region), alignment_(alignment), owning_(owning) { }
151 
152 public:
153  friend reserved_address_range_t detail_::wrap(region_t, alignment_t, bool);
154 
155  reserved_address_range_t(reserved_address_range_t&& other) noexcept
156  : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
157  {
158  other.owning_ = false;
159  }
160 
161  ~reserved_address_range_t() DESTRUCTOR_EXCEPTION_SPEC
162  {
163  if (not owning_) { return; }
164 #if THROW_IN_DESTRUCTORS
165  detail_::cancel_reservation(region_);
166 #else
167  detail_::cancel_reservation_nothrow(region_);
168 #endif
169  }
170 
171 public: // getters
172  bool is_owning() const noexcept { return owning_; }
173  region_t region() const noexcept{ return region_; }
174  alignment_t alignment() const noexcept { return alignment_; }
175 
176 protected: // data members
177  const region_t region_;
178  const alignment_t alignment_;
179  bool owning_;
180 };
181 
182 namespace detail_ {
183 
184 inline reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership)
185 {
186  return { address_range, alignment, take_ownership };
187 }
188 
189 } // namespace detail_
190 
191 inline reserved_address_range_t reserve(region_t requested_region, alignment_t alignment = alignment::default_)
192 {
193  unsigned long flags { 0 };
194  CUdeviceptr ptr;
195  auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, device::address(requested_region), flags);
196  throw_if_error_lazy(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
197  + " with alignment value " + ::std::to_string(alignment));
198  bool is_owning { true };
199  return detail_::wrap(memory::region_t {as_pointer(ptr), requested_region.size() }, alignment, is_owning);
200 }
201 
202 inline reserved_address_range_t reserve(size_t requested_size, alignment_t alignment = alignment::default_)
203 {
204  return reserve(region_t{ nullptr, requested_size }, alignment);
205 }
206 
207 } // namespace physical_allocation
208 
209 class physical_allocation_t {
210 protected: // constructors
211  physical_allocation_t(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit)
212  : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
213 
214 public: // constructors & destructor
215  physical_allocation_t(const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(false)
216  { }
217 
218  physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
219  {
220  other.holds_refcount_unit_ = false;
221  }
222 
223  ~physical_allocation_t() DESTRUCTOR_EXCEPTION_SPEC
224  {
225  if (not holds_refcount_unit_) { return; }
226  auto status = cuMemRelease(handle_);
227 #ifdef THROW_IN_DESTRUCTORS
228  throw_if_error_lazy(status, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
229 #else
230  (void) status;
231 #endif
232  }
233 
234 public: // non-mutators
235  friend physical_allocation_t physical_allocation::detail_::wrap(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit);
236 
237  size_t size() const noexcept { return size_; }
238  physical_allocation::handle_t handle() const noexcept { return handle_; }
239  bool holds_refcount_unit() const noexcept { return holds_refcount_unit_; }
240 
241  physical_allocation::properties_t properties() const {
242  CUmemAllocationProp raw_properties;
243  auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
244  throw_if_error_lazy(status, "Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
245  return { raw_properties };
246  }
247 
248  template <physical_allocation::shared_handle_kind_t SharedHandleKind>
249  physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle() const
250  {
251  physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
252  static constexpr const unsigned long long flags { 0 };
253  auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
254  throw_if_error_lazy(result, "Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
255  return shared_handle_;
256  }
257 
258 protected: // data members
259  const physical_allocation::handle_t handle_;
260  size_t size_;
261  bool holds_refcount_unit_;
262 };
263 
264 namespace physical_allocation {
265 
266 inline physical_allocation_t create(size_t size, properties_t properties)
267 {
268  static constexpr const unsigned long long flags { 0 };
269  CUmemGenericAllocationHandle handle;
270  auto result = cuMemCreate(&handle, size, &properties.raw, flags);
271  throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
272  static constexpr const bool is_owning { true };
273  return detail_::wrap(handle, size, is_owning);
274 }
275 
276 physical_allocation_t create(size_t size, device_t device);
277 
278 namespace detail_ {
279 
280 inline ::std::string identify(handle_t handle, size_t size) {
281  return ::std::string("physical allocation with handle ") + ::std::to_string(handle)
282  + " of size " + ::std::to_string(size);
283 }
284 
285 inline physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit)
286 {
287  return { handle, size, holds_refcount_unit };
288 }
289 
290 inline properties_t properties_of(handle_t handle)
291 {
292  CUmemAllocationProp prop;
293  auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
294  throw_if_error_lazy(result, "Failed obtaining the properties of the virtual memory physical_allocation with handle "
295  + ::std::to_string(handle));
296  return { prop };
297 }
298 
299 } // namespace detail_
300 
312 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
313 physical_allocation_t import(shared_handle_t<SharedHandleKind> shared_handle, size_t size, bool holds_refcount_unit = false)
314 {
315  handle_t result_handle;
316  auto result = cuMemImportFromShareableHandle(
317  &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
318  throw_if_error_lazy(result, "Failed importing a virtual memory physical_allocation from a shared handle ");
319  return physical_allocation::detail_::wrap(result_handle, size, holds_refcount_unit);
320 }
321 
322 namespace detail_ {
323 
324 inline ::std::string identify(physical_allocation_t physical_allocation) {
325  return identify(physical_allocation.handle(), physical_allocation.size());
326 }
327 
328 } // namespace detail_
329 
330 } // namespace physical_allocation
331 
332 /*
333 enum access_mode_t : ::std::underlying_type<CUmemAccess_flags>::type {
334  no_access = CU_MEM_ACCESS_FLAGS_PROT_NONE,
335  read_access = CU_MEM_ACCESS_FLAGS_PROT_READ,
336  read_and_write_access = CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
337  rw_access = read_and_write_access
338 };
339 */
340 
341 namespace virtual_ {
342 namespace mapping {
343 namespace detail_ {
344 
345 inline mapping_t wrap(region_t address_range, bool owning = false);
346 
347 inline ::std::string identify(region_t address_range) {
348  return ::std::string("mapping of ") + memory::detail_::identify(address_range);
349 }
350 
351 } // namespace detail_
352 } // namespace mapping
353 
354 namespace detail_ {
355 
356 inline permissions_t get_permissions(region_t fully_mapped_region, cuda::device::id_t device_id)
357 {
358  CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
359  unsigned long long flags;
360  auto result = cuMemGetAccess(&flags, &location, device::address(fully_mapped_region) );
361  throw_if_error_lazy(result, "Failed determining the access mode for "
362  + cuda::device::detail_::identify(device_id)
363  + " to the virtual memory mapping to the range of size "
364  + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
365  return permissions::detail_::from_flags(static_cast<CUmemAccess_flags>(flags)); // Does this actually work?
366 }
367 
368 } // namespace detail_
369 
376 permissions_t get_access_mode(region_t fully_mapped_region, const device_t& device);
377 
382 permissions_t get_access_mode(mapping_t mapping, const device_t& device);
383 
390 void set_permissions(region_t fully_mapped_region, const device_t& device, permissions_t access_mode);
391 
396 void set_permissions(mapping_t mapping, const device_t& device, permissions_t access_mode);
398 
405 template <template <typename...> class ContiguousContainer>
407 void set_permissions(
408  region_t fully_mapped_region,
409  const ContiguousContainer<device_t>& devices,
410  permissions_t access_mode);
411 
412 template <template <typename...> class ContiguousContainer>
413 void set_permissions(
414  region_t fully_mapped_region,
415  ContiguousContainer<device_t>&& devices,
416  permissions_t access_mode);
418 
423 template <template <typename...> class ContiguousContainer>
425 inline void set_permissions(
426  mapping_t mapping,
427  const ContiguousContainer<device_t>& devices,
428  permissions_t access_mode);
429 
430 template <template <typename...> class ContiguousContainer>
431 inline void set_permissions(
432  mapping_t mapping,
433  ContiguousContainer<device_t>&& devices,
434  permissions_t access_mode);
436 
437 
438 class mapping_t {
439 protected: // constructors
440  mapping_t(region_t region, bool owning) : address_range_(region), owning_(owning) { }
441 
442 public: // constructors & destructors
443 
444  friend mapping_t mapping::detail_::wrap(region_t address_range, bool owning);
445 
446  mapping_t(const mapping_t& other) noexcept :
447  address_range_(other.address_range()), owning_(false) { }
448 
449  mapping_t(mapping_t&& other) noexcept :
450  address_range_(other.address_range()), owning_(other.owning_)
451  {
452  other.owning_ = false;
453  }
454 
455  region_t address_range() const noexcept { return address_range_; }
456  bool is_owning() const noexcept { return owning_; }
457 
458  permissions_t get_permissions(const device_t& device) const;
459  void set_permissions(const device_t& device, permissions_t access_mode) const;
460 
461  template <template <typename...> class ContiguousContainer>
462  inline void set_permissions(
463  const ContiguousContainer<device_t>& devices,
464  permissions_t access_mode) const;
465 
466  template <template <typename...> class ContiguousContainer>
467  inline void set_permissions(
468  ContiguousContainer<device_t>&& devices,
469  permissions_t access_mode) const;
470 
471  ~mapping_t() noexcept(false)
472  {
473  if (not owning_) { return; }
474  auto result = cuMemUnmap(device::address(address_range_), address_range_.size());
475  throw_if_error_lazy(result, "Failed unmapping " + mapping::detail_::identify(address_range_));
476  }
477 
478 public:
479 #if CUDA_VERSION >= 11000
480 
481  physical_allocation_t allocation() const
482  {
483  CUmemGenericAllocationHandle allocation_handle;
484  auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
485  throw_if_error_lazy(status, " Failed obtaining/retaining the physical_allocation handle for the virtual memory "
486  "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) + " of size " +
487  ::std::to_string(address_range_.size()) + " bytes");
488  constexpr const bool increase_refcount{false};
489  return physical_allocation::detail_::wrap(allocation_handle, address_range_.size(), increase_refcount);
490  }
491 #endif
492 protected:
493 
494  region_t address_range_;
495  bool owning_;
496 
497 };
498 
499 namespace mapping {
500 
501 namespace detail_ {
502 
503 mapping_t wrap(region_t range, bool owning)
504 {
505  return { range, owning };
506 }
507 
508 inline ::std::string identify(mapping_t mapping)
509 {
510  return mapping::detail_::identify(mapping.address_range());
511 }
512 
513 } // namespace detail_
514 
515 } // namespace mapping
516 
517 inline mapping_t map(region_t region, physical_allocation_t physical_allocation)
518 {
519  size_t offset_into_allocation { 0 }; // not yet supported, but in the API
520  constexpr const unsigned long long flags { 0 };
521  auto handle = physical_allocation.handle();
522  auto status = cuMemMap(device::address(region), region.size(), offset_into_allocation, handle, flags);
523  throw_if_error_lazy(status, "Failed making a virtual memory mapping of "
524  + physical_allocation::detail_::identify(physical_allocation)
525  + " to the range of size " + ::std::to_string(region.size()) + " bytes at " +
526  cuda::detail_::ptr_as_hex(region.data()));
527  constexpr const bool is_owning { true };
528  return mapping::detail_::wrap(region, is_owning);
529 }
530 
531 } // namespace virtual_
532 } // namespace memory
533 } // namespace cuda
534 
535 #endif // CUDA_VERSION >= 10020
536 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
Alias for the default behavior; see heuristic .
Definition: types.hpp:903
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:684
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74