4 #ifndef CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 5 #define CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ 11 #if CUDA_VERSION >= 10020 20 class physical_allocation_t;
23 namespace physical_allocation {
25 using handle_t = CUmemGenericAllocationHandle;
29 physical_allocation_t
wrap(
handle_t handle,
size_t size,
bool holds_refcount_unit);
34 enum class granularity_kind_t : ::std::underlying_type<CUmemAllocationGranularity_flags_enum>::type {
35 minimum_required = CU_MEM_ALLOC_GRANULARITY_MINIMUM,
36 recommended_for_performance = CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
52 shared_handle_kind_t requested_kind()
const 54 return shared_handle_kind_t(raw.requestedHandleTypes);
58 size_t granularity(detail_::granularity_kind_t kind)
const {
60 auto status = cuMemGetAllocationGranularity(&result, &raw,
61 static_cast<CUmemAllocationGranularity_flags>(kind));
67 size_t minimum_granularity()
const {
return granularity(detail_::granularity_kind_t::minimum_required); }
68 size_t recommended_granularity()
const {
return granularity(detail_::granularity_kind_t::recommended_for_performance); }
71 properties_t(CUmemAllocationProp_st raw_properties) : raw(raw_properties)
73 if (raw.location.type != CU_MEM_LOCATION_TYPE_DEVICE) {
74 throw ::std::runtime_error(
"Unexpected physical_allocation type - we only know about devices!");
78 properties_t(properties_t&&) =
default;
79 properties_t(
const properties_t&) =
default;
82 CUmemAllocationProp_st raw;
88 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
91 CUmemAllocationProp_st raw_props{};
92 raw_props.type = CU_MEM_ALLOCATION_TYPE_PINNED;
93 raw_props.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
94 raw_props.location.id =
static_cast<int>(device_id);
95 raw_props.requestedHandleTypes =
static_cast<CUmemAllocationHandleType
>(SharedHandleKind);
96 raw_props.win32HandleMetaData =
nullptr;
97 return properties_t{raw_props};
102 template<physical_allocation::shared_handle_kind_t SharedHandleKind>
103 properties_t create_properties_for(
const device_t& device);
109 class reserved_address_range_t;
117 throw_if_error_lazy(status,
"Failed freeing a reservation of " + memory::detail_::identify(reserved));
122 using alignment_t =
size_t;
124 enum alignment : alignment_t {
131 reserved_address_range_t
wrap(
region_t address_range, alignment_t alignment,
bool take_ownership);
136 class reserved_address_range_t {
139 reserved_address_range_t(
region_t region, alignment_t alignment,
bool owning) noexcept
140 : region_(region), alignment_(alignment), owning_(owning) { }
145 reserved_address_range_t(reserved_address_range_t&& other) noexcept
146 : region_(other.region_), alignment_(other.alignment_), owning_(other.owning_)
148 other.owning_ =
false;
151 ~reserved_address_range_t() noexcept(false)
153 if (not owning_) {
return; }
154 detail_::cancel_reservation(region_);
158 bool is_owning() const noexcept {
return owning_; }
159 region_t region() const noexcept{
return region_; }
160 alignment_t alignment() const noexcept {
return alignment_; }
164 const alignment_t alignment_;
170 inline reserved_address_range_t
wrap(
region_t address_range, alignment_t alignment,
bool take_ownership)
172 return { address_range, alignment, take_ownership };
177 inline reserved_address_range_t reserve(
region_t requested_region, alignment_t alignment = alignment::default_)
179 unsigned long flags { 0 };
181 auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment,
device::address(requested_region), flags);
182 throw_if_error_lazy(status,
"Failed making a reservation of " + cuda::memory::detail_::identify(requested_region)
183 +
" with alignment value " + ::std::to_string(alignment));
184 bool is_owning {
true };
188 inline reserved_address_range_t reserve(
size_t requested_size, alignment_t alignment = alignment::default_)
190 return reserve(
region_t{
nullptr, requested_size }, alignment);
195 class physical_allocation_t {
198 : handle_(handle), size_(size), holds_refcount_unit_(holds_refcount_unit) { }
201 physical_allocation_t(
const physical_allocation_t& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(
false)
204 physical_allocation_t(physical_allocation_t&& other) noexcept : handle_(other.handle_), size_(other.size_), holds_refcount_unit_(other.holds_refcount_unit_)
206 other.holds_refcount_unit_ =
false;
209 ~physical_allocation_t() noexcept(false)
211 if (not holds_refcount_unit_) {
return; }
212 auto result = cuMemRelease(handle_);
213 throw_if_error_lazy(result,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_));
219 size_t size() const noexcept {
return size_; }
221 bool holds_refcount_unit() const noexcept {
return holds_refcount_unit_; }
223 physical_allocation::properties_t properties()
const {
224 CUmemAllocationProp raw_properties;
225 auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_);
226 throw_if_error_lazy(status,
"Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_));
227 return { raw_properties };
230 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
231 physical_allocation::shared_handle_t<SharedHandleKind> sharing_handle()
const 233 physical_allocation::shared_handle_t<SharedHandleKind> shared_handle_;
234 static constexpr
const unsigned long long flags { 0 };
235 auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, static_cast<CUmemAllocationHandleType>(SharedHandleKind), flags);
236 throw_if_error_lazy(result,
"Exporting a (generic CUDA) shared memory physical_allocation to a shared handle");
237 return shared_handle_;
243 bool holds_refcount_unit_;
246 namespace physical_allocation {
248 inline physical_allocation_t create(
size_t size, properties_t properties)
250 static constexpr
const unsigned long long flags { 0 };
251 CUmemGenericAllocationHandle handle;
252 auto result = cuMemCreate(&handle, size, &properties.raw, flags);
253 throw_if_error_lazy(result,
"Failed making a virtual memory physical_allocation of size " + ::std::to_string(size));
254 static constexpr
const bool is_owning {
true };
258 physical_allocation_t create(
size_t size, device_t device);
262 inline ::std::string identify(
handle_t handle,
size_t size) {
263 return ::std::string(
"physical allocation with handle ") + ::std::to_string(handle)
264 +
" of size " + ::std::to_string(size);
267 inline physical_allocation_t
wrap(
handle_t handle,
size_t size,
bool holds_refcount_unit)
269 return { handle, size, holds_refcount_unit };
272 inline properties_t properties_of(
handle_t handle)
274 CUmemAllocationProp prop;
275 auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle);
276 throw_if_error_lazy(result,
"Failed obtaining the properties of the virtual memory physical_allocation with handle " 277 + ::std::to_string(handle));
294 template <physical_allocation::shared_handle_kind_t SharedHandleKind>
295 physical_allocation_t
import(shared_handle_t<SharedHandleKind> shared_handle,
size_t size,
bool holds_refcount_unit =
false)
298 auto result = cuMemImportFromShareableHandle(
299 &result_handle, reinterpret_cast<void*>(shared_handle), CUmemAllocationHandleType(SharedHandleKind));
300 throw_if_error_lazy(result,
"Failed importing a virtual memory physical_allocation from a shared handle ");
306 inline ::std::string identify(physical_allocation_t physical_allocation) {
307 return identify(physical_allocation.handle(), physical_allocation.size());
327 inline mapping_t
wrap(
region_t address_range,
bool owning =
false);
329 inline ::std::string identify(
region_t address_range) {
330 return ::std::string(
"mapping of ") + memory::detail_::identify(address_range);
340 CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id };
341 unsigned long long flags;
342 auto result = cuMemGetAccess(&flags, &location,
device::address(fully_mapped_region) );
344 + cuda::device::detail_::identify(device_id)
345 +
" to the virtual memory mapping to the range of size " 346 + ::std::to_string(fully_mapped_region.size()) +
" bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data()));
347 return permissions::detail_::from_flags(static_cast<CUmemAccess_flags>(flags));
358 permissions_t get_access_mode(
region_t fully_mapped_region,
const device_t& device);
364 permissions_t get_access_mode(mapping_t mapping,
const device_t& device);
372 void set_permissions(
region_t fully_mapped_region,
const device_t& device, permissions_t access_mode);
378 void set_permissions(mapping_t mapping,
const device_t& device, permissions_t access_mode);
387 template <
template <
typename...>
class ContiguousContainer>
389 void set_permissions(
391 const ContiguousContainer<device_t>&
devices,
392 permissions_t access_mode);
394 template <
template <
typename...>
class ContiguousContainer>
395 void set_permissions(
397 ContiguousContainer<device_t>&&
devices,
398 permissions_t access_mode);
405 template <
template <
typename...>
class ContiguousContainer>
407 inline void set_permissions(
409 const ContiguousContainer<device_t>&
devices,
410 permissions_t access_mode);
412 template <
template <
typename...>
class ContiguousContainer>
413 inline void set_permissions(
415 ContiguousContainer<device_t>&&
devices,
416 permissions_t access_mode);
422 mapping_t(
region_t region,
bool owning) : address_range_(region), owning_(owning) { }
428 mapping_t(
const mapping_t& other) noexcept :
429 address_range_(other.address_range()), owning_(
false) { }
431 mapping_t(mapping_t&& other) noexcept :
432 address_range_(other.address_range()), owning_(other.owning_)
434 other.owning_ =
false;
437 region_t address_range() const noexcept {
return address_range_; }
438 bool is_owning() const noexcept {
return owning_; }
440 permissions_t get_permissions(
const device_t& device)
const;
441 void set_permissions(
const device_t& device, permissions_t access_mode)
const;
443 template <
template <
typename...>
class ContiguousContainer>
444 inline void set_permissions(
445 const ContiguousContainer<device_t>&
devices,
446 permissions_t access_mode)
const;
448 template <
template <
typename...>
class ContiguousContainer>
449 inline void set_permissions(
450 ContiguousContainer<device_t>&&
devices,
451 permissions_t access_mode)
const;
453 ~mapping_t() noexcept(false)
455 if (not owning_) {
return; }
456 auto result = cuMemUnmap(
device::address(address_range_), address_range_.size());
457 throw_if_error_lazy(result,
"Failed unmapping " + mapping::detail_::identify(address_range_));
461 #if CUDA_VERSION >= 11000 463 physical_allocation_t allocation()
const 465 CUmemGenericAllocationHandle allocation_handle;
466 auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data());
467 throw_if_error_lazy(status,
" Failed obtaining/retaining the physical_allocation handle for the virtual memory " 468 "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) +
" of size " +
469 ::std::to_string(address_range_.size()) +
" bytes");
470 constexpr
const bool increase_refcount{
false};
487 return { range, owning };
490 inline ::std::string identify(mapping_t mapping)
492 return mapping::detail_::identify(mapping.address_range());
499 inline mapping_t map(
region_t region, physical_allocation_t physical_allocation)
501 size_t offset_into_allocation { 0 };
502 constexpr
const unsigned long long flags { 0 };
503 auto handle = physical_allocation.handle();
504 auto status = cuMemMap(
device::address(region), region.size(), offset_into_allocation, handle, flags);
506 + physical_allocation::detail_::identify(physical_allocation)
507 +
" to the range of size " + ::std::to_string(region.size()) +
" bytes at " +
508 cuda::detail_::ptr_as_hex(region.data()));
509 constexpr
const bool is_owning {
true };
517 #endif // CUDA_VERSION >= 10020 518 #endif // CUDA_API_WRAPPERS_VIRTUAL_MEMORY_HPP_ Alias for the default behavior; see heuristic .
Definition: types.hpp:901
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135