8 #ifndef CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ 9 #define CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ 11 #if CUDA_VERSION >= 11020 38 result.id = device_id;
39 result.type = CU_MEM_LOCATION_TYPE_DEVICE;
43 #if CUDA_VERSION >= 11020 44 template<pool::shared_handle_kind_t SharedHandleKind = pool::shared_handle_kind_t::no_export>
46 template<pool::shared_handle_kind_t SharedHandleKind>
50 CUmemPoolProps result;
54 ::std::memset(&result, 0,
sizeof(CUmemPoolProps));
56 result.location = create_mem_location(device_id);
57 result.allocType = CU_MEM_ALLOCATION_TYPE_PINNED;
58 result.handleTypes =
static_cast<CUmemAllocationHandleType
>(SharedHandleKind);
59 result.win32SecurityAttributes =
nullptr;
65 return "memory pool at " + cuda::detail_::ptr_as_hex(handle);
70 return identify(handle) +
" on " + cuda::device::detail_::identify(device_id);
73 ::std::string identify(
const pool_t &pool);
77 return cuMemPoolDestroy(handle);
82 auto status = destroy_nothrow(handle);
89 using attribute_t = CUmemPool_attribute;
93 template <attribute_t attribute>
struct attribute_value {};
95 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES> {
using type = bool; };
96 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC> {
using type = bool; };
97 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES> {
using type = bool; };
98 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD> {
using type =
size_t; };
99 #if CUDA_VERSION >= 11030 100 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT> {
using type =
size_t; };
101 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH> {
using type =
size_t; };
102 template <>
struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_CURRENT> {
using type =
size_t; };
103 template <>
struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_HIGH> {
using type =
size_t; };
106 template <
typename T>
struct attribute_value_inner_type {
using type = T; };
107 template <>
struct attribute_value_inner_type<bool> {
using type = int; };
108 template <>
struct attribute_value_inner_type<
size_t> {
using type = cuuint64_t; };
110 template <
typename T>
111 using attribute_value_inner_type_t =
typename attribute_value_inner_type<T>::type;
116 template <attribute_t attribute>
117 using attribute_value_t =
typename detail_::attribute_value<attribute>::type;
121 template<attribute_t attribute>
122 struct status_and_attribute_value {
124 attribute_value_t<attribute> value;
127 template<attribute_t attribute>
128 status_and_attribute_value<attribute> get_attribute_with_status(
handle_t pool_handle)
130 using outer_type = attribute_value_t <attribute>;
131 using inner_type = attribute_value_inner_type_t<outer_type>;
132 inner_type attribute_value;
133 auto status = cuMemPoolGetAttribute(pool_handle, attribute, &attribute_value);
134 return { status,
static_cast<outer_type
>(attribute_value) };
137 template<attribute_t attribute>
138 attribute_value_t<attribute> get_attribute(
handle_t pool_handle)
140 auto status_and_attribute_value = get_attribute_with_status<attribute>(pool_handle);
142 "Obtaining attribute " + ::std::to_string(static_cast<int>(attribute))
143 +
" of " + detail_::identify(pool_handle));
144 return status_and_attribute_value.value;
147 template<attribute_t attribute>
148 void set_attribute(
handle_t pool_handle, attribute_value_t<attribute> value)
150 using outer_type = attribute_value_t <attribute>;
151 using inner_type =
typename attribute_value_inner_type<outer_type>::type;
152 inner_type value_ =
static_cast<inner_type
>(value);
153 auto status = cuMemPoolSetAttribute(pool_handle, attribute, &value_);
154 throw_if_error_lazy(status,
"Setting attribute " + ::std::to_string(static_cast<int>(attribute))
155 +
" of " + detail_::identify(pool_handle));
178 CUmemAccess_flags access_flags;
179 auto mem_location = pool::detail_::create_mem_location(device_id);
180 auto status = cuMemPoolGetAccess(&access_flags, pool_handle, &mem_location);
182 "Determining access information for " + cuda::device::detail_::identify(device_id)
183 +
" to " + pool::detail_::identify(pool_handle));
184 return permissions::detail_::from_flags(access_flags);
187 inline void set_permissions(span<cuda::device::id_t> device_ids,
pool::handle_t pool_handle, permissions_t permissions)
189 if (permissions.write and not permissions.read) {
190 throw ::std::invalid_argument(
"Memory pool access get_permissions cannot be write-only");
193 CUmemAccess_flags flags = permissions.read ?
194 (permissions.write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
195 CU_MEM_ACCESS_FLAGS_PROT_NONE;
197 ::std::vector<CUmemAccessDesc> descriptors;
198 descriptors.reserve(device_ids.size());
200 for(
auto device_id : device_ids) {
201 CUmemAccessDesc desc;
203 desc.location = pool::detail_::create_mem_location(device_id);
204 descriptors.push_back(desc);
207 auto status = cuMemPoolSetAccess(pool_handle, descriptors.data(), descriptors.size());
209 "Setting access get_permissions for " + ::std::to_string(descriptors.size())
210 +
" devices to " + pool::detail_::identify(pool_handle));
215 if (permissions.write and not permissions.read) {
216 throw ::std::invalid_argument(
"Memory pool access get_permissions cannot be write-only");
219 CUmemAccessDesc desc;
220 desc.flags = permissions.read ?
222 CU_MEM_ACCESS_FLAGS_PROT_READWRITE :
223 CU_MEM_ACCESS_FLAGS_PROT_READ) :
224 CU_MEM_ACCESS_FLAGS_PROT_NONE;
226 desc.location = pool::detail_::create_mem_location(device_id);
227 auto status = cuMemPoolSetAccess(pool_handle, &desc, 1);
229 "Setting access get_permissions for " + cuda::device::detail_::identify(device_id)
230 +
" to " + pool::detail_::identify(pool_handle));
235 permissions_t get_permissions(
const cuda::device_t& device,
const pool_t& pool);
236 void set_permissions(
const cuda::device_t& device,
const pool_t& pool, permissions_t permissions);
237 template <
typename DeviceRange>
238 void get_permissions(DeviceRange
devices,
const pool_t& pool_handle, permissions_t permissions);
242 struct reuse_policy_t {
247 bool when_dependent_on_free;
253 bool independent_but_actually_freed;
260 bool allow_waiting_for_frees;
265 class imported_ptr_t;
278 region_t allocate(
const stream_t& stream,
size_t num_bytes)
const;
280 pool::ipc::imported_ptr_t
import(
const memory::pool::ipc::ptr_handle_t& exported_handle)
const;
282 void trim(
size_t min_bytes_to_keep)
const 284 auto status = cuMemPoolTrimTo(handle_, min_bytes_to_keep);
286 +
" down to " + ::std::to_string(min_bytes_to_keep));
289 template<pool::attribute_t attribute>
290 pool::attribute_value_t<attribute> get_attribute()
const 292 auto attribute_with_status = pool::detail_::get_attribute_with_status<attribute>(handle_);
294 + ::std::to_string(static_cast<int>(attribute)) +
" of " + pool::detail_::identify(*
this));
295 return attribute_with_status.value;
298 template<pool::attribute_t attribute>
299 void set_attribute(
const pool::attribute_value_t<attribute>& value)
const 301 using outer_type = pool::attribute_value_t <attribute>;
302 using inner_type =
typename pool::detail_::attribute_value_inner_type<outer_type>::type;
303 auto inner_value =
static_cast<inner_type
>(value);
304 auto status = cuMemPoolSetAttribute(handle_, attribute, &inner_value);
305 throw_if_error_lazy(status,
"Failed setting attribute " + ::std::to_string(static_cast<int>(attribute))
306 +
" of " + pool::detail_::identify(*
this));
309 size_t release_threshold()
const 311 return static_cast<size_t>(get_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>());
314 void set_release_threshold(
size_t threshold)
const 316 set_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>(threshold);
321 return memory::get_permissions(device, *
this);
338 void set_permissions(
const cuda::device_t& device, permissions_t permissions)
340 return memory::set_permissions(device, *
this, permissions);
347 template <
typename DeviceRange>
348 void set_permissions(DeviceRange
devices, permissions_t permissions)
350 return memory::set_permissions(devices, *
this, permissions);
360 pool::reuse_policy_t reuse_policy()
const 363 get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(),
364 get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(),
365 get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>()
373 void set_reuse_policy(pool::reuse_policy_t reuse_policy)
const 375 set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(reuse_policy.when_dependent_on_free);
376 set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(reuse_policy.independent_but_actually_freed);
377 set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(reuse_policy.allow_waiting_for_frees);
380 #if CUDA_VERSION >= 11030 385 size_t backing_memory_size()
const {
386 return get_attribute<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT>();
407 bool is_owning() const noexcept {
return owning_; }
413 pool_t(
const pool_t& other) =
delete;
415 pool_t(pool_t&& other) noexcept : pool_t(other.device_id_, other.handle_, other.owning_)
417 other.owning_ =
false;
420 ~pool_t() DESTRUCTOR_EXCEPTION_SPEC
422 if (not owning_) {
return; }
423 #ifdef THROW_IN_DESTRUCTORS 424 pool::detail_::destroy(handle_);
426 memory::pool::detail_::destroy_nothrow(handle_);
432 : device_id_(device_id), handle_(handle), owning_(owning)
441 inline bool operator==(
const pool_t& lhs,
const pool_t& rhs)
444 return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
447 inline bool operator!=(
const pool_t& lhs,
const pool_t& rhs)
449 return not (lhs == rhs);
456 return { device_id, handle, owning };
461 template<shared_handle_kind_t SharedHandleKind = shared_handle_kind_t::no_export>
464 auto props = create_raw_properties<SharedHandleKind>(device_id);
466 auto status = cuMemPoolCreate(&handle, &props);
467 throw_if_error_lazy(status,
"Failed creating a memory pool on device " + cuda::device::detail_::identify(device_id));
468 constexpr
const bool is_owning {
true };
469 return wrap(device_id, handle, is_owning);
472 inline ::std::string identify(
const pool_t& pool)
474 return identify(pool.handle(), pool.device_id());
479 template<shared_handle_kind_t SharedHandleKind>
488 #endif // CUDA_VERSION >= 11020 490 #endif // CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74