4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 11 #if CUDA_VERSION >= 10020 25 class physical_allocation_t;
28 namespace physical_allocation {
30 using handle_t = CUmemGenericAllocationHandle;
34 physical_allocation_t
wrap(
handle_t handle,
size_t size,
bool holds_refcount_unit);
39 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
40 minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
41 recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
57 shared_handle_kind_t requested_kind()
const 59 return shared_handle_kind_t(raw.requestedHandleTypes);
63 size_t granularity(detail_::granularity_kind_t kind)
const {
65 auto status = cuMemGetAllocationGranularity(&result, &raw,
66 static_cast<CUmemAllocationGranularity_flags>(kind));
72 size_t minimum_granularity()
const {
return granularity(detail_::granularity_kind_t::minimum_required); }
73 size_t recommended_granularity()
const {
return granularity(detail_::granularity_kind_t::recommended_for_performance); }
76 properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
78 if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
79 throw ::std::runtime_error(
"Unexpected physical_allocation type - we only know about devices!");
83 properties_t(properties_t&&) =
default;
84 properties_t(
const properties_t&) =
default;
87 CUmemAllocationProp_st raw;
93 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
96 CUmemAllocationProp_st raw_props{};
97 raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
98 raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
99 raw_props.location.id =
static_cast<int>(device_id);
100 raw_props.requestedHandleTypes =
static_cast<CUmemAllocationHandleType
>(SharedHandleKind);
101 raw_props.win32HandleMetaData =
nullptr;
102 return properties_t{raw_props};
107 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
108 properties_t create_properties_for(
const device_t& device);
114 class reserved_address_range_t;
126 auto status = cancel_reservation_nothrow(reserved);
127 throw_if_error_lazy(status,
"Failed freeing a reservation of " + memory::detail_::identify(reserved));
132 using alignment_t =
size_t;
134 enum alignment : alignment_t {
141 reserved_address_range_t
wrap(
region_t address_range, alignment_t alignment,
bool take_ownership);
146 class reserved_address_range_t {
149 reserved_address_range_t(
region_t region, alignment_t alignment,
bool owning) noexcept
150 : region_(region), alignment_(alignment), owning_(owning) { }
155 reserved_address_range_t(reserved_address_range_t&& other) noexcept
156 : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
158 other.owning_ =
false;
161 ~reserved_address_range_t() DESTRUCTOR_EXCEPTION_SPEC
163 if (not owning_) {
return; }
164 #if THROW_IN_DESTRUCTORS 165 detail_::cancel_reservation(region_);
167 detail_::cancel_reservation_nothrow(region_);
172 bool is_owning() const noexcept {
return owning_; }
173 region_t region() const noexcept{
return region_; }
174 alignment_t alignment() const noexcept {
return alignment_; }
178 const alignment_t alignment_;
184 inline reserved_address_range_t
wrap(
region_t address_range, alignment_t alignment,
bool take_ownership)
186 return { address_range, alignment, take_ownership };
191 inline reserved_address_range_t reserve(
region_t requested_region, alignment_t alignment = alignment::default_)
193 unsigned long flags { 0 };
195 auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment,
device::address(requested_region), flags);
196 throw_if_error_lazy(status,
"Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
197 +
" with alignment value " + ::std::to_string(alignment));
198 bool is_owning {
true };
202 inline reserved_address_range_t reserve(
size_t requested_size, alignment_t alignment = alignment::default_)
204 return reserve(
region_t{
nullptr, requested_size }, alignment);
209 class physical_allocation_t {
212 : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
215 physical_allocation_t(
const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(
false)
218 physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
220 other.holds_refcount_unit_ =
false;
223 ~physical_allocation_t() DESTRUCTOR_EXCEPTION_SPEC
225 if (not holds_refcount_unit_) {
return; }
226 auto status = cuMemRelease(handle_);
227 #ifdef THROW_IN_DESTRUCTORS 228 throw_if_error_lazy(status,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
237 size_t size() const noexcept {
return size_; }
239 bool holds_refcount_unit() const noexcept {
return holds_refcount_unit_; }
241 physical_allocation::properties_t properties()
const {
242 CUmemAllocationProp raw_properties;
243 auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
244 throw_if_error_lazy(status,
"Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
245 return { raw_properties };
248 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
249 physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle()
const 251 physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
252 static constexpr
const unsigned long long flags { 0 };
253 auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
254 throw_if_error_lazy(result,
"Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
255 return shared_handle_;
261 bool holds_refcount_unit_;
264 namespace physical_allocation {
266 inline physical_allocation_t create(
size_t size, properties_t properties)
268 static constexpr
const unsigned long long flags { 0 };
269 CUmemGenericAllocationHandle handle;
270 auto result = cuMemCreate(&handle, size, &properties.raw, flags);
271 throw_if_error_lazy(result,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
272 static constexpr
const bool is_owning {
true };
276 physical_allocation_t create(
size_t size, device_t device);
280 inline ::std::string identify(
handle_t handle,
size_t size) {
281 return ::std::string(
"physical allocation with handle ") + ::std::to_string(handle)
282 +
" of size " + ::std::to_string(size);
285 inline physical_allocation_t
wrap(
handle_t handle,
size_t size,
bool holds_refcount_unit)
287 return { handle, size, holds_refcount_unit };
290 inline properties_t properties_of(
handle_t handle)
292 CUmemAllocationProp prop;
293 auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
294 throw_if_error_lazy(result,
"Failed obtaining the properties of the virtual memory physical_allocation with handle " 295 + ::std::to_string(handle));
312 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
313 physical_allocation_t
import(shared_handle_t<SharedHandleKind> shared_handle,
size_t size,
bool holds_refcount_unit =
false)
316 auto result = cuMemImportFromShareableHandle(
317 &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
318 throw_if_error_lazy(result,
"Failed importing a virtual memory physical_allocation from a shared handle ");
324 inline ::std::string identify(physical_allocation_t physical_allocation) {
325 return identify(physical_allocation.handle(), physical_allocation.size());
345 inline mapping_t
wrap(
region_t address_range,
bool owning =
false);
347 inline ::std::string identify(
region_t address_range) {
348 return ::std::string(
"mapping of ") + memory::detail_::identify(address_range);
358 CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
359 unsigned long long flags;
360 auto result = cuMemGetAccess(&flags, &location,
device::address(fully_mapped_region) );
362 + cuda::device::detail_::identify(device_id)
363 +
" to the virtual memory mapping to the range of size " 364 + ::std::to_string(fully_mapped_region.size()) +
" bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
365 return permissions::detail_::from_flags(static_cast<CUmemAccess_flags>(flags));
376 permissions_t get_access_mode(
region_t fully_mapped_region,
const device_t& device);
382 permissions_t get_access_mode(mapping_t mapping,
const device_t& device);
390 void set_permissions(
region_t fully_mapped_region,
const device_t& device, permissions_t access_mode);
396 void set_permissions(mapping_t mapping,
const device_t& device, permissions_t access_mode);
405 template <
template <
typename...>
class ContiguousContainer>
407 void set_permissions(
409 const ContiguousContainer<device_t>&
devices,
410 permissions_t access_mode);
412 template <
template <
typename...>
class ContiguousContainer>
413 void set_permissions(
415 ContiguousContainer<device_t>&&
devices,
416 permissions_t access_mode);
423 template <
template <
typename...>
class ContiguousContainer>
425 inline void set_permissions(
427 const ContiguousContainer<device_t>&
devices,
428 permissions_t access_mode);
430 template <
template <
typename...>
class ContiguousContainer>
431 inline void set_permissions(
433 ContiguousContainer<device_t>&&
devices,
434 permissions_t access_mode);
440 mapping_t(
region_t region,
bool owning) : address_range_(region), owning_(owning) { }
446 mapping_t(
const mapping_t& other) noexcept :
447 address_range_(other.address_range()), owning_(
false) { }
449 mapping_t(mapping_t&& other) noexcept :
450 address_range_(other.address_range()), owning_(other.owning_)
452 other.owning_ =
false;
455 region_t address_range() const noexcept {
return address_range_; }
456 bool is_owning() const noexcept {
return owning_; }
458 permissions_t get_permissions(
const device_t& device)
const;
459 void set_permissions(
const device_t& device, permissions_t access_mode)
const;
461 template <
template <
typename...>
class ContiguousContainer>
462 inline void set_permissions(
463 const ContiguousContainer<device_t>&
devices,
464 permissions_t access_mode)
const;
466 template <
template <
typename...>
class ContiguousContainer>
467 inline void set_permissions(
468 ContiguousContainer<device_t>&&
devices,
469 permissions_t access_mode)
const;
471 ~mapping_t() noexcept(false)
473 if (not owning_) {
return; }
474 auto result = cuMemUnmap(
device::address(address_range_), address_range_.size());
475 throw_if_error_lazy(result,
"Failed unmapping " + mapping::detail_::identify(address_range_));
479 #if CUDA_VERSION >= 11000 481 physical_allocation_t allocation()
const 483 CUmemGenericAllocationHandle allocation_handle;
484 auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
485 throw_if_error_lazy(status,
" Failed obtaining/retaining the physical_allocation handle for the virtual memory " 486 "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) +
" of size " +
487 ::std::to_string(address_range_.size()) +
" bytes");
488 constexpr
const bool increase_refcount{
false};
505 return { range, owning };
508 inline ::std::string identify(mapping_t mapping)
510 return mapping::detail_::identify(mapping.address_range());
517 inline mapping_t map(
region_t region, physical_allocation_t physical_allocation)
519 size_t offset_into_allocation { 0 };
520 constexpr
const unsigned long long flags { 0 };
521 auto handle = physical_allocation.handle();
522 auto status = cuMemMap(
device::address(region), region.size(), offset_into_allocation, handle, flags);
524 + physical_allocation::detail_::identify(physical_allocation)
525 +
" to the range of size " + ::std::to_string(region.size()) +
" bytes at " +
526 cuda::detail_::ptr_as_hex(region.data()));
527 constexpr
const bool is_owning {
true };
535 #endif // CUDA_VERSION >= 10020 536 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ Alias for the default behavior; see heuristic .
Definition: types.hpp:903
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:684
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74