4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 11 #if CUDA_VERSION >= 10020 19 class physical_allocation_t;
21 namespace physical_allocation {
23 using handle_t = CUmemGenericAllocationHandle;
27 physical_allocation_t
wrap(handle_t handle,
size_t size,
bool holds_refcount_unit);
32 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
33 minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
34 recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
47 cuda::device_t device()
const;
50 shared_handle_kind_t requested_kind()
const 52 return shared_handle_kind_t(raw.requestedHandleTypes);
56 size_t granularity(detail_::granularity_kind_t kind)
const {
58 auto status = cuMemGetAllocationGranularity(&result, &raw,
59 static_cast<CUmemAllocationGranularity_flags>(kind));
60 throw_if_error_lazy(status,
"Could not determine physical allocation granularity");
65 size_t minimum_granularity()
const {
return granularity(detail_::granularity_kind_t::minimum_required); }
66 size_t recommended_granularity()
const {
return granularity(detail_::granularity_kind_t::recommended_for_performance); }
69 properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
71 if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
72 throw ::std::runtime_error(
"Unexpected physical_allocation type - we only know about devices!");
76 properties_t(properties_t&&) =
default;
77 properties_t(
const properties_t&) =
default;
80 CUmemAllocationProp_st raw;
86 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
89 CUmemAllocationProp_st raw_props{};
90 raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
91 raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
92 raw_props.location.id =
static_cast<int>(device_id);
93 raw_props.requestedHandleTypes =
static_cast<CUmemAllocationHandleType
>(SharedHandleKind);
94 raw_props.win32HandleMetaData =
nullptr;
95 return properties_t{raw_props};
100 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
101 properties_t create_properties_for(
const device_t& device);
107 class reserved_address_range_t;
112 inline void cancel_reservation(memory::region_t reserved)
115 throw_if_error_lazy(status,
"Failed freeing a reservation of " + memory::detail_::identify(reserved));
120 using alignment_t = size_t;
122 enum alignment : alignment_t {
129 reserved_address_range_t
wrap(region_t address_range, alignment_t alignment,
bool take_ownership);
134 class reserved_address_range_t {
137 reserved_address_range_t(region_t region, alignment_t alignment,
bool owning) noexcept
138 : region_(region), alignment_(alignment), owning_(owning) { }
141 friend reserved_address_range_t detail_::wrap(region_t, alignment_t,
bool);
143 reserved_address_range_t(reserved_address_range_t&& other) noexcept
144 : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
146 other.owning_ =
false;
149 ~reserved_address_range_t() noexcept(false)
151 if (not owning_) {
return; }
152 detail_::cancel_reservation(region_);
156 bool is_owning() const noexcept {
return owning_; }
157 region_t region() const noexcept{
return region_; }
158 alignment_t alignment() const noexcept {
return alignment_; }
161 const region_t region_;
162 const alignment_t alignment_;
168 inline reserved_address_range_t
wrap(region_t address_range, alignment_t alignment,
bool take_ownership)
170 return { address_range, alignment, take_ownership };
175 inline reserved_address_range_t reserve(region_t requested_region, alignment_t alignment = alignment::default_)
177 unsigned long flags { 0 };
179 auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, requested_region.device_address(), flags);
180 throw_if_error_lazy(status,
"Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
181 +
" with alignment value " + ::std::to_string(alignment));
182 bool is_owning {
true };
183 return detail_::wrap(memory::region_t { ptr, requested_region.size() }, alignment, is_owning);
186 inline reserved_address_range_t reserve(
size_t requested_size, alignment_t alignment = alignment::default_)
188 return reserve(region_t{
nullptr, requested_size }, alignment);
193 class physical_allocation_t {
195 physical_allocation_t(physical_allocation::handle_t handle,
size_t size,
bool holds_refcount_unit)
196 : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
199 physical_allocation_t(
const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(
false)
202 physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
204 other.holds_refcount_unit_ =
false;
207 ~physical_allocation_t() noexcept(false)
209 if (not holds_refcount_unit_) {
return; }
210 auto result = cuMemRelease(handle_);
211 throw_if_error_lazy(result,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
215 friend physical_allocation_t physical_allocation::detail_::wrap(physical_allocation::handle_t handle,
size_t size,
bool holds_refcount_unit);
217 size_t size() const noexcept {
return size_; }
218 physical_allocation::handle_t handle() const noexcept {
return handle_; }
219 bool holds_refcount_unit() const noexcept {
return holds_refcount_unit_; }
221 physical_allocation::properties_t properties()
const {
222 CUmemAllocationProp raw_properties;
223 auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
224 throw_if_error_lazy(status,
"Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
225 return { raw_properties };
228 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
229 physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle()
const 231 physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
232 static constexpr
const unsigned long long flags { 0 };
233 auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
234 throw_if_error_lazy(result,
"Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
235 return shared_handle_;
239 const physical_allocation::handle_t handle_;
241 bool holds_refcount_unit_;
244 namespace physical_allocation {
246 inline physical_allocation_t create(
size_t size, properties_t properties)
248 static constexpr
const unsigned long long flags { 0 };
249 CUmemGenericAllocationHandle handle;
250 auto result = cuMemCreate(&handle, size, &properties.raw, flags);
251 throw_if_error_lazy(result,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
252 static constexpr
const bool is_owning {
true };
253 return detail_::wrap(handle, size, is_owning);
256 physical_allocation_t create(
size_t size, device_t device);
260 inline ::std::string identify(handle_t handle,
size_t size) {
261 return ::std::string(
"physical allocation with handle ") + ::std::to_string(handle)
262 +
" of size " + ::std::to_string(size);
265 inline physical_allocation_t
wrap(handle_t handle,
size_t size,
bool holds_refcount_unit)
267 return { handle, size, holds_refcount_unit };
270 inline properties_t properties_of(handle_t handle)
272 CUmemAllocationProp prop;
273 auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
274 throw_if_error_lazy(result,
"Failed obtaining the properties of the virtual memory physical_allocation with handle " 275 + ::std::to_string(handle));
292 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
293 physical_allocation_t
import(shared_handle_t<SharedHandleKind> shared_handle,
size_t size,
bool holds_refcount_unit =
false)
295 handle_t result_handle;
296 auto result = cuMemImportFromShareableHandle(
297 &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
298 throw_if_error_lazy(result,
"Failed importing a virtual memory physical_allocation from a shared handle ");
299 return physical_allocation::detail_::wrap(result_handle, size, holds_refcount_unit);
304 inline ::std::string identify(physical_allocation_t physical_allocation) {
305 return identify(physical_allocation.handle(), physical_allocation.size());
325 inline mapping_t
wrap(region_t address_range,
bool owning =
false);
327 inline ::std::string identify(region_t address_range) {
328 return ::std::string(
"mapping of ") + memory::detail_::identify(address_range);
336 inline access_permissions_t get_access_mode(region_t fully_mapped_region,
cuda::device::id_t device_id)
338 CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
339 unsigned long long flags;
340 auto result = cuMemGetAccess(&flags, &location, fully_mapped_region.device_address() );
341 throw_if_error_lazy(result,
"Failed determining the access mode for " 342 + cuda::device::detail_::identify(device_id)
343 +
" to the virtual memory mapping to the range of size " 344 + ::std::to_string(fully_mapped_region.size()) +
" bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
345 return access_permissions_t::from_access_flags(static_cast<CUmemAccess_flags>(flags));
356 access_permissions_t get_access_mode(region_t fully_mapped_region,
const device_t& device);
362 access_permissions_t get_access_mode(mapping_t mapping,
const device_t& device);
370 void set_access_mode(region_t fully_mapped_region,
const device_t& device, access_permissions_t access_mode);
376 void set_access_mode(mapping_t mapping,
const device_t& device, access_permissions_t access_mode);
385 template <
template <
typename...>
class ContiguousContainer>
387 void set_access_mode(
388 region_t fully_mapped_region,
389 const ContiguousContainer<device_t>& devices,
390 access_permissions_t access_mode);
392 template <
template <
typename...>
class ContiguousContainer>
393 void set_access_mode(
394 region_t fully_mapped_region,
395 ContiguousContainer<device_t>&& devices,
396 access_permissions_t access_mode);
403 template <
template <
typename...>
class ContiguousContainer>
405 inline void set_access_mode(
407 const ContiguousContainer<device_t>& devices,
408 access_permissions_t access_mode);
410 template <
template <
typename...>
class ContiguousContainer>
411 inline void set_access_mode(
413 ContiguousContainer<device_t>&& devices,
414 access_permissions_t access_mode);
420 mapping_t(region_t region,
bool owning) : address_range_(region), owning_(owning) { }
424 friend mapping_t mapping::detail_::wrap(region_t address_range,
bool owning);
426 mapping_t(
const mapping_t& other) noexcept :
427 address_range_(other.address_range()), owning_(
false) { }
429 mapping_t(mapping_t&& other) noexcept :
430 address_range_(other.address_range()), owning_(other.owning_)
432 other.owning_ =
false;
435 region_t address_range() const noexcept {
return address_range_; }
436 bool is_owning() const noexcept {
return owning_; }
438 access_permissions_t get_access_mode(
const device_t& device)
const;
439 void set_access_mode(
const device_t& device, access_permissions_t access_mode)
const;
441 template <
template <
typename...>
class ContiguousContainer>
442 inline void set_access_mode(
443 const ContiguousContainer<device_t>& devices,
444 access_permissions_t access_mode)
const;
446 template <
template <
typename...>
class ContiguousContainer>
447 inline void set_access_mode(
448 ContiguousContainer<device_t>&& devices,
449 access_permissions_t access_mode)
const;
451 ~mapping_t() noexcept(false)
453 if (not owning_) {
return; }
454 auto result = cuMemUnmap(address_range_.device_address(), address_range_.size());
455 throw_if_error_lazy(result,
"Failed unmapping " + mapping::detail_::identify(address_range_));
459 #if CUDA_VERSION >= 11000 461 physical_allocation_t allocation()
const 463 CUmemGenericAllocationHandle allocation_handle;
464 auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
465 throw_if_error_lazy(status,
" Failed obtaining/retaining the physical_allocation handle for the virtual memory " 466 "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) +
" of size " +
467 ::std::to_string(address_range_.size()) +
" bytes");
468 constexpr
const bool increase_refcount{
false};
469 return physical_allocation::detail_::wrap(allocation_handle, address_range_.size(), increase_refcount);
474 region_t address_range_;
483 mapping_t
wrap(region_t range,
bool owning)
485 return { range, owning };
488 inline ::std::string identify(mapping_t mapping)
490 return mapping::detail_::identify(mapping.address_range());
497 inline mapping_t map(region_t region, physical_allocation_t physical_allocation)
499 size_t offset_into_allocation { 0 };
500 constexpr
const unsigned long long flags { 0 };
501 auto handle = physical_allocation.handle();
502 auto status = cuMemMap(region.device_address(), region.size(), offset_into_allocation, handle, flags);
503 throw_if_error_lazy(status,
"Failed making a virtual memory mapping of " 504 + physical_allocation::detail_::identify(physical_allocation)
505 +
" to the range of size " + ::std::to_string(region.size()) +
" bytes at " +
506 cuda::detail_::ptr_as_hex(region.data()));
507 constexpr
const bool is_owning {
true };
508 return mapping::detail_::wrap(region, is_owning);
515 #endif // CUDA_VERSION >= 10020 516 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ Alias for the default behavior; see heuristic .
Definition: types.hpp:805
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:248
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:22
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:752
Definition: kernel_launch.hpp:77
address_t address(const void *device_ptr) noexcept
Return a pointers address as a numeric value of the type appropriate for device.
Definition: types.hpp:621
Representation, allocation and manipulation of CUDA-related memory, of different kinds.