cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
virtual_memory.hpp
Go to the documentation of this file.
1 
4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
6 
7 // We need this out of the #ifdef, as otherwise we don't know what
8 // the CUDA_VERSION is...
9 #include <cuda.h>
10 
11 #if CUDA_VERSION >= 10020
12 #include "types.hpp"
13 #include "memory.hpp"
14 
15 namespace cuda {
16 // TODO: Perhaps move this down into the device namespace ?
17 namespace memory {
18 
19 class physical_allocation_t;
20 
21 namespace physical_allocation {
22 
23 using handle_t = CUmemGenericAllocationHandle;
24 
25 namespace detail_ {
26 
27 physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit);
28 
29 } // namespace detail_
30 
31 namespace detail_ {
32 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
33  minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
34  recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
35 };
36 
37 } // namespace detail_
38 
39 // Note: Not inheriting from CUmemAllocationProp_st, since
40 // that structure is a bit messed up
41 struct properties_t {
42  // Note: Specifying a compression type is currently unsupported,
43  // as the driver API does not document semantics for the relevant
44  // properties field
45 
46 public: // getters
47  cuda::device_t device() const;
48 
49  // TODO: Is this only relevant to requests?
50  shared_handle_kind_t requested_kind() const
51  {
52  return shared_handle_kind_t(raw.requestedHandleTypes);
53  };
54 
55 protected: // non-mutators
56  size_t granularity(detail_::granularity_kind_t kind) const {
57  size_t result;
58  auto status = cuMemGetAllocationGranularity(&result, &raw,
59  static_cast<CUmemAllocationGranularity_flags>(kind));
60  throw_if_error_lazy(status, "Could not determine physical allocation granularity");
61  return result;
62  }
63 
64 public: // non-mutators
65  size_t minimum_granularity() const { return granularity(detail_::granularity_kind_t::minimum_required); }
66  size_t recommended_granularity() const { return granularity(detail_::granularity_kind_t::recommended_for_performance); }
67 
68 public:
69  properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
70  {
71  if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
72  throw ::std::runtime_error("Unexpected physical_allocation type - we only know about devices!");
73  }
74  }
75 
76  properties_t(properties_t&&) = default;
77  properties_t(const properties_t&) = default;
78 
79 public:
80  CUmemAllocationProp_st raw;
81 
82 };
83 
84 namespace detail_ {
85 
86 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
87 properties_t create_properties(cuda::device::id_t device_id)
88 {
89  CUmemAllocationProp_st raw_props{};
90  raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
91  raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
92  raw_props.location.id = static_cast<int>(device_id);
93  raw_props.requestedHandleTypes = static_cast<CUmemAllocationHandleType>(SharedHandleKind);
94  raw_props.win32HandleMetaData = nullptr;
95  return properties_t{raw_props};
96 }
97 
98 } // namespace detail_
99 
100 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
101 properties_t create_properties_for(const device_t& device);
102 
103 } // namespace physical_allocation
104 
105 namespace virtual_ {
106 
107 class reserved_address_range_t;
108 class mapping_t;
109 
110 namespace detail_ {
111 
112 inline void cancel_reservation(memory::region_t reserved)
113 {
114  auto status = cuMemAddressFree(memory::device::address(reserved.start()), reserved.size());
115  throw_if_error_lazy(status, "Failed freeing a reservation of " + memory::detail_::identify(reserved));
116 }
117 
118 } // namespace detail_
119 
120 using alignment_t = size_t;
121 
122 enum alignment : alignment_t {
123  default_ = 0,
124  trivial = 1
125 };
126 
127 namespace detail_ {
128 
129 reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership);
130 
131 } // namespace detail_
132 
133 
134 class reserved_address_range_t {
135 protected:
136 
137  reserved_address_range_t(region_t region, alignment_t alignment, bool owning) noexcept
138  : region_(region), alignment_(alignment), owning_(owning) { }
139 
140 public:
141  friend reserved_address_range_t detail_::wrap(region_t, alignment_t, bool);
142 
143  reserved_address_range_t(reserved_address_range_t&& other) noexcept
144  : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
145  {
146  other.owning_ = false;
147  }
148 
149  ~reserved_address_range_t() noexcept(false)
150  {
151  if (not owning_) { return; }
152  detail_::cancel_reservation(region_);
153  }
154 
155 public: // getters
156  bool is_owning() const noexcept { return owning_; }
157  region_t region() const noexcept{ return region_; }
158  alignment_t alignment() const noexcept { return alignment_; }
159 
160 protected: // data members
161  const region_t region_;
162  const alignment_t alignment_;
163  bool owning_;
164 };
165 
166 namespace detail_ {
167 
168 inline reserved_address_range_t wrap(region_t address_range, alignment_t alignment, bool take_ownership)
169 {
170  return { address_range, alignment, take_ownership };
171 }
172 
173 } // namespace detail_
174 
175 inline reserved_address_range_t reserve(region_t requested_region, alignment_t alignment = alignment::default_)
176 {
177  unsigned long flags { 0 };
178  CUdeviceptr ptr;
179  auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, requested_region.device_address(), flags);
180  throw_if_error_lazy(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
181  + " with alignment value " + ::std::to_string(alignment));
182  bool is_owning { true };
183  return detail_::wrap(memory::region_t { ptr, requested_region.size() }, alignment, is_owning);
184 }
185 
186 inline reserved_address_range_t reserve(size_t requested_size, alignment_t alignment = alignment::default_)
187 {
188  return reserve(region_t{ nullptr, requested_size }, alignment);
189 }
190 
191 } // namespace physical_allocation
192 
193 class physical_allocation_t {
194 protected: // constructors
195  physical_allocation_t(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit)
196  : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
197 
198 public: // constructors & destructor
199  physical_allocation_t(const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(false)
200  { }
201 
202  physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
203  {
204  other.holds_refcount_unit_ = false;
205  }
206 
207  ~physical_allocation_t() noexcept(false)
208  {
209  if (not holds_refcount_unit_) { return; }
210  auto result = cuMemRelease(handle_);
211  throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
212  }
213 
214 public: // non-mutators
215  friend physical_allocation_t physical_allocation::detail_::wrap(physical_allocation::handle_t handle, size_t size, bool holds_refcount_unit);
216 
217  size_t size() const noexcept { return size_; }
218  physical_allocation::handle_t handle() const noexcept { return handle_; }
219  bool holds_refcount_unit() const noexcept { return holds_refcount_unit_; }
220 
221  physical_allocation::properties_t properties() const {
222  CUmemAllocationProp raw_properties;
223  auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
224  throw_if_error_lazy(status, "Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
225  return { raw_properties };
226  }
227 
228  template <physical_allocation::shared_handle_kind_t SharedHandleKind>
229  physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle() const
230  {
231  physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
232  static constexpr const unsigned long long flags { 0 };
233  auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
234  throw_if_error_lazy(result, "Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
235  return shared_handle_;
236  }
237 
238 protected: // data members
239  const physical_allocation::handle_t handle_;
240  size_t size_;
241  bool holds_refcount_unit_;
242 };
243 
244 namespace physical_allocation {
245 
246 inline physical_allocation_t create(size_t size, properties_t properties)
247 {
248  static constexpr const unsigned long long flags { 0 };
249  CUmemGenericAllocationHandle handle;
250  auto result = cuMemCreate(&handle, size, &properties.raw, flags);
251  throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
252  static constexpr const bool is_owning { true };
253  return detail_::wrap(handle, size, is_owning);
254 }
255 
256 physical_allocation_t create(size_t size, device_t device);
257 
258 namespace detail_ {
259 
260 inline ::std::string identify(handle_t handle, size_t size) {
261  return ::std::string("physical allocation with handle ") + ::std::to_string(handle)
262  + " of size " + ::std::to_string(size);
263 }
264 
265 inline physical_allocation_t wrap(handle_t handle, size_t size, bool holds_refcount_unit)
266 {
267  return { handle, size, holds_refcount_unit };
268 }
269 
270 inline properties_t properties_of(handle_t handle)
271 {
272  CUmemAllocationProp prop;
273  auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
274  throw_if_error_lazy(result, "Failed obtaining the properties of the virtual memory physical_allocation with handle "
275  + ::std::to_string(handle));
276  return { prop };
277 }
278 
279 } // namespace detail_
280 
292 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
293 physical_allocation_t import(shared_handle_t<SharedHandleKind> shared_handle, size_t size, bool holds_refcount_unit = false)
294 {
295  handle_t result_handle;
296  auto result = cuMemImportFromShareableHandle(
297  &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
298  throw_if_error_lazy(result, "Failed importing a virtual memory physical_allocation from a shared handle ");
299  return physical_allocation::detail_::wrap(result_handle, size, holds_refcount_unit);
300 }
301 
302 namespace detail_ {
303 
304 inline ::std::string identify(physical_allocation_t physical_allocation) {
305  return identify(physical_allocation.handle(), physical_allocation.size());
306 }
307 
308 } // namespace detail_
309 
310 } // namespace physical_allocation
311 
312 /*
313 enum access_mode_t : ::std::underlying_type<CUmemAccess_flags>::type {
314  no_access = CU_MEM_ACCESS_FLAGS_PROT_NONE,
315  read_access = CU_MEM_ACCESS_FLAGS_PROT_READ,
316  read_and_write_access = CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
317  rw_access = read_and_write_access
318 };
319 */
320 
321 namespace virtual_ {
322 namespace mapping {
323 namespace detail_ {
324 
325 inline mapping_t wrap(region_t address_range, bool owning = false);
326 
327 inline ::std::string identify(region_t address_range) {
328  return ::std::string("mapping of ") + memory::detail_::identify(address_range);
329 }
330 
331 } // namespace detail_
332 } // namespace mapping
333 
334 namespace detail_ {
335 
336 inline access_permissions_t get_access_mode(region_t fully_mapped_region, cuda::device::id_t device_id)
337 {
338  CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
339  unsigned long long flags;
340  auto result = cuMemGetAccess(&flags, &location, fully_mapped_region.device_address() );
341  throw_if_error_lazy(result, "Failed determining the access mode for "
342  + cuda::device::detail_::identify(device_id)
343  + " to the virtual memory mapping to the range of size "
344  + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
345  return access_permissions_t::from_access_flags(static_cast<CUmemAccess_flags>(flags)); // Does this actually work?
346 }
347 
348 } // namespace detail_
349 
356 access_permissions_t get_access_mode(region_t fully_mapped_region, const device_t& device);
357 
362 access_permissions_t get_access_mode(mapping_t mapping, const device_t& device);
363 
370 void set_access_mode(region_t fully_mapped_region, const device_t& device, access_permissions_t access_mode);
371 
376 void set_access_mode(mapping_t mapping, const device_t& device, access_permissions_t access_mode);
378 
385 template <template <typename...> class ContiguousContainer>
387 void set_access_mode(
388  region_t fully_mapped_region,
389  const ContiguousContainer<device_t>& devices,
390  access_permissions_t access_mode);
391 
392 template <template <typename...> class ContiguousContainer>
393 void set_access_mode(
394  region_t fully_mapped_region,
395  ContiguousContainer<device_t>&& devices,
396  access_permissions_t access_mode);
398 
403 template <template <typename...> class ContiguousContainer>
405 inline void set_access_mode(
406  mapping_t mapping,
407  const ContiguousContainer<device_t>& devices,
408  access_permissions_t access_mode);
409 
410 template <template <typename...> class ContiguousContainer>
411 inline void set_access_mode(
412  mapping_t mapping,
413  ContiguousContainer<device_t>&& devices,
414  access_permissions_t access_mode);
416 
417 
418 class mapping_t {
419 protected: // constructors
420  mapping_t(region_t region, bool owning) : address_range_(region), owning_(owning) { }
421 
422 public: // constructors & destructors
423 
424  friend mapping_t mapping::detail_::wrap(region_t address_range, bool owning);
425 
426  mapping_t(const mapping_t& other) noexcept :
427  address_range_(other.address_range()), owning_(false) { }
428 
429  mapping_t(mapping_t&& other) noexcept :
430  address_range_(other.address_range()), owning_(other.owning_)
431  {
432  other.owning_ = false;
433  }
434 
435  region_t address_range() const noexcept { return address_range_; }
436  bool is_owning() const noexcept { return owning_; }
437 
438  access_permissions_t get_access_mode(const device_t& device) const;
439  void set_access_mode(const device_t& device, access_permissions_t access_mode) const;
440 
441  template <template <typename...> class ContiguousContainer>
442  inline void set_access_mode(
443  const ContiguousContainer<device_t>& devices,
444  access_permissions_t access_mode) const;
445 
446  template <template <typename...> class ContiguousContainer>
447  inline void set_access_mode(
448  ContiguousContainer<device_t>&& devices,
449  access_permissions_t access_mode) const;
450 
451  ~mapping_t() noexcept(false)
452  {
453  if (not owning_) { return; }
454  auto result = cuMemUnmap(address_range_.device_address(), address_range_.size());
455  throw_if_error_lazy(result, "Failed unmapping " + mapping::detail_::identify(address_range_));
456  }
457 
458 public:
459 #if CUDA_VERSION >= 11000
460 
461  physical_allocation_t allocation() const
462  {
463  CUmemGenericAllocationHandle allocation_handle;
464  auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
465  throw_if_error_lazy(status, " Failed obtaining/retaining the physical_allocation handle for the virtual memory "
466  "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) + " of size " +
467  ::std::to_string(address_range_.size()) + " bytes");
468  constexpr const bool increase_refcount{false};
469  return physical_allocation::detail_::wrap(allocation_handle, address_range_.size(), increase_refcount);
470  }
471 #endif
472 protected:
473 
474  region_t address_range_;
475  bool owning_;
476 
477 };
478 
479 namespace mapping {
480 
481 namespace detail_ {
482 
483 mapping_t wrap(region_t range, bool owning)
484 {
485  return { range, owning };
486 }
487 
488 inline ::std::string identify(mapping_t mapping)
489 {
490  return mapping::detail_::identify(mapping.address_range());
491 }
492 
493 } // namespace detail_
494 
495 } // namespace mapping
496 
497 inline mapping_t map(region_t region, physical_allocation_t physical_allocation)
498 {
499  size_t offset_into_allocation { 0 }; // not yet supported, but in the API
500  constexpr const unsigned long long flags { 0 };
501  auto handle = physical_allocation.handle();
502  auto status = cuMemMap(region.device_address(), region.size(), offset_into_allocation, handle, flags);
503  throw_if_error_lazy(status, "Failed making a virtual memory mapping of "
504  + physical_allocation::detail_::identify(physical_allocation)
505  + " to the range of size " + ::std::to_string(region.size()) + " bytes at " +
506  cuda::detail_::ptr_as_hex(region.data()));
507  constexpr const bool is_owning { true };
508  return mapping::detail_::wrap(region, is_owning);
509 }
510 
511 } // namespace virtual_
512 } // namespace memory
513 } // namespace cuda
514 
515 #endif // CUDA_VERSION >= 10020
516 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_
Alias for the default behavior; see heuristic .
Definition: types.hpp:805
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:248
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:22
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:752
Definition: kernel_launch.hpp:77
address_t address(const void *device_ptr) noexcept
Return a pointers address as a numeric value of the type appropriate for device.
Definition: types.hpp:621
Representation, allocation and manipulation of CUDA-related memory, of different kinds.