cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
virtual_memory.hpp
Go to the documentation of this file.
1 
4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
6 
7 // We need this out of the #ifdef, as otherwise we don't know what
8 // the CUDA_VERSION is...
9 #include <cuda.h>
10 
11 #if CUDA_VERSION >= 10020
12 #include "types.hpp"
13 #include "memory.hpp"
14 
15 namespace cuda {
16 // TODO: Perhaps move this down into the device namespace ?
17 namespace memory {
18 
20 class physical_allocation_t;
22 
23 namespace physical_allocation {
24 
25 using handle_t = CUmemGenericAllocationHandle;
26 
27 namespace detail_ {
28 
29 physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit);
30 
31 } // namespace detail_
32 
33 namespace detail_ {
34 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
35  minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
36  recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
37 };
38 
39 } // namespace detail_
40 
41 // Note: Not inheriting from CUmemAllocationProp_st, since
42 // that structure is a bit messed up
43 struct properties_t {
44  // Note: Specifying a compression type is currently unsupported,
45  // as the driver API does not document semantics for the relevant
46  // properties field
47 
48 public: // getters
49  cuda::device_t device() const;
50 
51  // TODO: Is this only relevant to requests?
52  shared_handle_kind_t requested_kind() const
53  {
54  return shared_handle_kind_t(raw.requestedHandleTypes);
55  };
56 
57 protected: // non-mutators
58  size_t granularity(detail_::granularity_kind_t kind) const {
59  size_t result;
60  auto status = cuMemGetAllocationGranularity(&result, &raw,
61  static_cast<CUmemAllocationGranularity_flags>(kind));
62  throw_if_error_lazy(status, "Could not determine physical allocation granularity");
63  return result;
64  }
65 
66 public: // non-mutators
67  size_t minimum_granularity() const { return granularity(detail_::granularity_kind_t::minimum_required); }
68  size_t recommended_granularity() const { return granularity(detail_::granularity_kind_t::recommended_for_performance); }
69 
70 public:
71  properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
72  {
73  if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
74  throw ::std::runtime_error("Unexpected physical_allocation type - we only know about devices!");
75  }
76  }
77 
78  properties_t(properties_t&&) = default;
79  properties_t(const properties_t&) = default;
80 
81 public:
82  CUmemAllocationProp_st raw;
83 
84 };
85 
86 namespace detail_ {
87 
88 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
89 properties_t create_properties(cuda::device::id_t device_id)
90 {
91  CUmemAllocationProp_st raw_props{};
92  raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
93  raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
94  raw_props.location.id = static_cast<int>(device_id);
95  raw_props.requestedHandleTypes = static_cast<CUmemAllocationHandleType>(SharedHandleKind);
96  raw_props.win32HandleMetaData = nullptr;
97  return properties_t{raw_props};
98 }
99 
100 } // namespace detail_
101 
102 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
103 properties_t create_properties_for(const device_t& device);
104 
105 } // namespace physical_allocation
106 
107 namespace virtual_ {
108 
109 class reserved_address_range_t;
110 class mapping_t;
111 
112 namespace detail_ {
113 
114 inline void cancel_reservation(memory::region_t reserved)
115 {
116  auto status = cuMemAddressFree(memory::device::address(reserved.start()), reserved.size());
117  throw_if_error_lazy(status, "Failed freeing a reservation of " + memory::detail_::identify(reserved));
118 }
119 
120 } // namespace detail_
121 
122 using alignment_t = size_t;
123 
124 enum alignment : alignment_t {
125  default_ = 0,
126  trivial = 1
127 };
128 
129 namespace detail_ {
130 
131 reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership);
132 
133 } // namespace detail_
134 
135 
136 class reserved_address_range_t {
137 protected:
138 
139  reserved_address_range_t(region_t region, alignment_t alignment, bool owning) noexcept
140  : region_(region), alignment_(alignment), owning_(owning) { }
141 
142 public:
143  friend reserved_address_range_t detail_::wrap(region_t, alignment_t, bool);
144 
145  reserved_address_range_t(reserved_address_range_t&& other) noexcept
146  : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
147  {
148  other.owning_ = false;
149  }
150 
151  ~reserved_address_range_t() noexcept(false)
152  {
153  if (not owning_) { return; }
154  detail_::cancel_reservation(region_);
155  }
156 
157 public: // getters
158  bool is_owning() const noexcept { return owning_; }
159  region_t region() const noexcept{ return region_; }
160  alignment_t alignment() const noexcept { return alignment_; }
161 
162 protected: // data members
163  const region_t region_;
164  const alignment_t alignment_;
165  bool owning_;
166 };
167 
168 namespace detail_ {
169 
170 inline reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership)
171 {
172  return { address_range, alignment, take_ownership };
173 }
174 
175 } // namespace detail_
176 
177 inline reserved_address_range_t reserve(region_t requested_region, alignment_t alignment = alignment::default_)
178 {
179  unsigned long flags { 0 };
180  CUdeviceptr ptr;
181  auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, device::address(requested_region), flags);
182  throw_if_error_lazy(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
183  + " with alignment value " + ::std::to_string(alignment));
184  bool is_owning { true };
185  return detail_::wrap(memory::region_t {as_pointer(ptr), requested_region.size() }, alignment, is_owning);
186 }
187 
188 inline reserved_address_range_t reserve(size_t requested_size, alignment_t alignment = alignment::default_)
189 {
190  return reserve(region_t{ nullptr, requested_size }, alignment);
191 }
192 
193 } // namespace physical_allocation
194 
195 class physical_allocation_t {
196 protected: // constructors
197  physical_allocation_t(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit)
198  : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
199 
200 public: // constructors & destructor
201  physical_allocation_t(const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(false)
202  { }
203 
204  physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
205  {
206  other.holds_refcount_unit_ = false;
207  }
208 
209  ~physical_allocation_t() noexcept(false)
210  {
211  if (not holds_refcount_unit_) { return; }
212  auto result = cuMemRelease(handle_);
213  throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
214  }
215 
216 public: // non-mutators
217  friend physical_allocation_t physical_allocation::detail_::wrap(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit);
218 
219  size_t size() const noexcept { return size_; }
220  physical_allocation::handle_t handle() const noexcept { return handle_; }
221  bool holds_refcount_unit() const noexcept { return holds_refcount_unit_; }
222 
223  physical_allocation::properties_t properties() const {
224  CUmemAllocationProp raw_properties;
225  auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
226  throw_if_error_lazy(status, "Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
227  return { raw_properties };
228  }
229 
230  template <physical_allocation::shared_handle_kind_t SharedHandleKind>
231  physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle() const
232  {
233  physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
234  static constexpr const unsigned long long flags { 0 };
235  auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
236  throw_if_error_lazy(result, "Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
237  return shared_handle_;
238  }
239 
240 protected: // data members
241  const physical_allocation::handle_t handle_;
242  size_t size_;
243  bool holds_refcount_unit_;
244 };
245 
246 namespace physical_allocation {
247 
248 inline physical_allocation_t create(size_t size, properties_t properties)
249 {
250  static constexpr const unsigned long long flags { 0 };
251  CUmemGenericAllocationHandle handle;
252  auto result = cuMemCreate(&handle, size, &properties.raw, flags);
253  throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
254  static constexpr const bool is_owning { true };
255  return detail_::wrap(handle, size, is_owning);
256 }
257 
258 physical_allocation_t create(size_t size, device_t device);
259 
260 namespace detail_ {
261 
262 inline ::std::string identify(handle_t handle, size_t size) {
263  return ::std::string("physical allocation with handle ") + ::std::to_string(handle)
264  + " of size " + ::std::to_string(size);
265 }
266 
267 inline physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit)
268 {
269  return { handle, size, holds_refcount_unit };
270 }
271 
272 inline properties_t properties_of(handle_t handle)
273 {
274  CUmemAllocationProp prop;
275  auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
276  throw_if_error_lazy(result, "Failed obtaining the properties of the virtual memory physical_allocation with handle "
277  + ::std::to_string(handle));
278  return { prop };
279 }
280 
281 } // namespace detail_
282 
294 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
295 physical_allocation_t import(shared_handle_t<SharedHandleKind> shared_handle, size_t size, bool holds_refcount_unit = false)
296 {
297  handle_t result_handle;
298  auto result = cuMemImportFromShareableHandle(
299  &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
300  throw_if_error_lazy(result, "Failed importing a virtual memory physical_allocation from a shared handle ");
301  return physical_allocation::detail_::wrap(result_handle, size, holds_refcount_unit);
302 }
303 
304 namespace detail_ {
305 
306 inline ::std::string identify(physical_allocation_t physical_allocation) {
307  return identify(physical_allocation.handle(), physical_allocation.size());
308 }
309 
310 } // namespace detail_
311 
312 } // namespace physical_allocation
313 
314 /*
315 enum access_mode_t : ::std::underlying_type<CUmemAccess_flags>::type {
316  no_access = CU_MEM_ACCESS_FLAGS_PROT_NONE,
317  read_access = CU_MEM_ACCESS_FLAGS_PROT_READ,
318  read_and_write_access = CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
319  rw_access = read_and_write_access
320 };
321 */
322 
323 namespace virtual_ {
324 namespace mapping {
325 namespace detail_ {
326 
327 inline mapping_t wrap(region_t address_range, bool owning = false);
328 
329 inline ::std::string identify(region_t address_range) {
330  return ::std::string("mapping of ") + memory::detail_::identify(address_range);
331 }
332 
333 } // namespace detail_
334 } // namespace mapping
335 
336 namespace detail_ {
337 
338 inline permissions_t get_permissions(region_t fully_mapped_region, cuda::device::id_t device_id)
339 {
340  CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
341  unsigned long long flags;
342  auto result = cuMemGetAccess(&flags, &location, device::address(fully_mapped_region) );
343  throw_if_error_lazy(result, "Failed determining the access mode for "
344  + cuda::device::detail_::identify(device_id)
345  + " to the virtual memory mapping to the range of size "
346  + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
347  return permissions::detail_::from_flags(static_cast<CUmemAccess_flags>(flags)); // Does this actually work?
348 }
349 
350 } // namespace detail_
351 
358 permissions_t get_access_mode(region_t fully_mapped_region, const device_t& device);
359 
364 permissions_t get_access_mode(mapping_t mapping, const device_t& device);
365 
372 void set_permissions(region_t fully_mapped_region, const device_t& device, permissions_t access_mode);
373 
378 void set_permissions(mapping_t mapping, const device_t& device, permissions_t access_mode);
380 
387 template <template <typename...> class ContiguousContainer>
389 void set_permissions(
390  region_t fully_mapped_region,
391  const ContiguousContainer<device_t>& devices,
392  permissions_t access_mode);
393 
394 template <template <typename...> class ContiguousContainer>
395 void set_permissions(
396  region_t fully_mapped_region,
397  ContiguousContainer<device_t>&& devices,
398  permissions_t access_mode);
400 
405 template <template <typename...> class ContiguousContainer>
407 inline void set_permissions(
408  mapping_t mapping,
409  const ContiguousContainer<device_t>& devices,
410  permissions_t access_mode);
411 
412 template <template <typename...> class ContiguousContainer>
413 inline void set_permissions(
414  mapping_t mapping,
415  ContiguousContainer<device_t>&& devices,
416  permissions_t access_mode);
418 
419 
420 class mapping_t {
421 protected: // constructors
422  mapping_t(region_t region, bool owning) : address_range_(region), owning_(owning) { }
423 
424 public: // constructors & destructors
425 
426  friend mapping_t mapping::detail_::wrap(region_t address_range, bool owning);
427 
428  mapping_t(const mapping_t& other) noexcept :
429  address_range_(other.address_range()), owning_(false) { }
430 
431  mapping_t(mapping_t&& other) noexcept :
432  address_range_(other.address_range()), owning_(other.owning_)
433  {
434  other.owning_ = false;
435  }
436 
437  region_t address_range() const noexcept { return address_range_; }
438  bool is_owning() const noexcept { return owning_; }
439 
440  permissions_t get_permissions(const device_t& device) const;
441  void set_permissions(const device_t& device, permissions_t access_mode) const;
442 
443  template <template <typename...> class ContiguousContainer>
444  inline void set_permissions(
445  const ContiguousContainer<device_t>& devices,
446  permissions_t access_mode) const;
447 
448  template <template <typename...> class ContiguousContainer>
449  inline void set_permissions(
450  ContiguousContainer<device_t>&& devices,
451  permissions_t access_mode) const;
452 
453  ~mapping_t() noexcept(false)
454  {
455  if (not owning_) { return; }
456  auto result = cuMemUnmap(device::address(address_range_), address_range_.size());
457  throw_if_error_lazy(result, "Failed unmapping " + mapping::detail_::identify(address_range_));
458  }
459 
460 public:
461 #if CUDA_VERSION >= 11000
462 
463  physical_allocation_t allocation() const
464  {
465  CUmemGenericAllocationHandle allocation_handle;
466  auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
467  throw_if_error_lazy(status, " Failed obtaining/retaining the physical_allocation handle for the virtual memory "
468  "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) + " of size " +
469  ::std::to_string(address_range_.size()) + " bytes");
470  constexpr const bool increase_refcount{false};
471  return physical_allocation::detail_::wrap(allocation_handle, address_range_.size(), increase_refcount);
472  }
473 #endif
474 protected:
475 
476  region_t address_range_;
477  bool owning_;
478 
479 };
480 
481 namespace mapping {
482 
483 namespace detail_ {
484 
485 mapping_t wrap(region_t range, bool owning)
486 {
487  return { range, owning };
488 }
489 
490 inline ::std::string identify(mapping_t mapping)
491 {
492  return mapping::detail_::identify(mapping.address_range());
493 }
494 
495 } // namespace detail_
496 
497 } // namespace mapping
498 
499 inline mapping_t map(region_t region, physical_allocation_t physical_allocation)
500 {
501  size_t offset_into_allocation { 0 }; // not yet supported, but in the API
502  constexpr const unsigned long long flags { 0 };
503  auto handle = physical_allocation.handle();
504  auto status = cuMemMap(device::address(region), region.size(), offset_into_allocation, handle, flags);
505  throw_if_error_lazy(status, "Failed making a virtual memory mapping of "
506  + physical_allocation::detail_::identify(physical_allocation)
507  + " to the range of size " + ::std::to_string(region.size()) + " bytes at " +
508  cuda::detail_::ptr_as_hex(region.data()));
509  constexpr const bool is_owning { true };
510  return mapping::detail_::wrap(region, is_owning);
511 }
512 
513 } // namespace virtual_
514 } // namespace memory
515 } // namespace cuda
516 
517 #endif // CUDA_VERSION >= 10020
518 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
Alias for the default behavior; see heuristic .
Definition: types.hpp:901
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135