8 #ifndef CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ 9 #define CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ 11 #if CUDA_VERSION >= 11020 38 result.id = device_id;
39 result.type = CU_MEM_LOCATION_TYPE_DEVICE;
43 #if CUDA_VERSION >= 11020 44 template<pool::shared_handle_kind_t SharedHandleKind = pool::shared_handle_kind_t::no_export>
46 template<pool::shared_handle_kind_t SharedHandleKind>
50 CUmemPoolProps result;
54 ::std::memset(&result, 0,
sizeof(CUmemPoolProps));
56 result.location = create_mem_location(device_id);
57 result.allocType = CU_MEM_ALLOCATION_TYPE_PINNED;
58 result.handleTypes =
static_cast<CUmemAllocationHandleType
>(SharedHandleKind);
59 result.win32SecurityAttributes =
nullptr;
65 return "memory pool at " + cuda::detail_::ptr_as_hex(handle);
70 return identify(handle) +
" on " + cuda::device::detail_::identify(device_id);
73 ::std::string identify(
const pool_t &pool);
79 using attribute_t = CUmemPool_attribute;
83 template <attribute_t attribute>
struct attribute_value {};
85 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES> {
using type = bool; };
86 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC> {
using type = bool; };
87 template <>
struct attribute_value<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES> {
using type = bool; };
88 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD> {
using type =
size_t; };
89 #if CUDA_VERSION >= 11030 90 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT> {
using type =
size_t; };
91 template <>
struct attribute_value<CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH> {
using type =
size_t; };
92 template <>
struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_CURRENT> {
using type =
size_t; };
93 template <>
struct attribute_value<CU_MEMPOOL_ATTR_USED_MEM_HIGH> {
using type =
size_t; };
96 template <
typename T>
struct attribute_value_inner_type {
using type = T; };
97 template <>
struct attribute_value_inner_type<bool> {
using type = int; };
98 template <>
struct attribute_value_inner_type<
size_t> {
using type = cuuint64_t; };
100 template <
typename T>
101 using attribute_value_inner_type_t =
typename attribute_value_inner_type<T>::type;
106 template <attribute_t attribute>
107 using attribute_value_t =
typename detail_::attribute_value<attribute>::type;
111 template<attribute_t attribute>
112 struct status_and_attribute_value {
114 attribute_value_t<attribute> value;
117 template<attribute_t attribute>
118 status_and_attribute_value<attribute> get_attribute_with_status(
handle_t pool_handle)
120 using outer_type = attribute_value_t <attribute>;
121 using inner_type = attribute_value_inner_type_t<outer_type>;
122 inner_type attribute_value;
123 auto status = cuMemPoolGetAttribute(pool_handle, attribute, &attribute_value);
124 return { status,
static_cast<outer_type
>(attribute_value) };
127 template<attribute_t attribute>
128 attribute_value_t<attribute> get_attribute(
handle_t pool_handle)
130 auto status_and_attribute_value = get_attribute_with_status<attribute>(pool_handle);
132 "Obtaining attribute " + ::std::to_string(static_cast<int>(attribute))
133 +
" of " + detail_::identify(pool_handle));
134 return status_and_attribute_value.value;
137 template<attribute_t attribute>
138 void set_attribute(
handle_t pool_handle, attribute_value_t<attribute> value)
140 using outer_type = attribute_value_t <attribute>;
141 using inner_type =
typename attribute_value_inner_type<outer_type>::type;
142 inner_type value_ =
static_cast<inner_type
>(value);
143 auto status = cuMemPoolSetAttribute(pool_handle, attribute, &value_);
144 throw_if_error_lazy(status,
"Setting attribute " + ::std::to_string(static_cast<int>(attribute))
145 +
" of " + detail_::identify(pool_handle));
168 CUmemAccess_flags access_flags;
169 auto mem_location = pool::detail_::create_mem_location(device_id);
170 auto status = cuMemPoolGetAccess(&access_flags, pool_handle, &mem_location);
172 "Determining access information for " + cuda::device::detail_::identify(device_id)
173 +
" to " + pool::detail_::identify(pool_handle));
174 return permissions::detail_::from_flags(access_flags);
177 inline void set_permissions(span<cuda::device::id_t> device_ids,
pool::handle_t pool_handle, permissions_t permissions)
179 if (permissions.write and not permissions.read) {
180 throw ::std::invalid_argument(
"Memory pool access get_permissions cannot be write-only");
183 CUmemAccess_flags flags = permissions.read ?
184 (permissions.write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
185 CU_MEM_ACCESS_FLAGS_PROT_NONE;
187 ::std::vector<CUmemAccessDesc> descriptors;
188 descriptors.reserve(device_ids.size());
190 for(
auto device_id : device_ids) {
191 CUmemAccessDesc desc;
193 desc.location = pool::detail_::create_mem_location(device_id);
194 descriptors.push_back(desc);
197 auto status = cuMemPoolSetAccess(pool_handle, descriptors.data(), descriptors.size());
199 "Setting access get_permissions for " + ::std::to_string(descriptors.size())
200 +
" devices to " + pool::detail_::identify(pool_handle));
205 if (permissions.write and not permissions.read) {
206 throw ::std::invalid_argument(
"Memory pool access get_permissions cannot be write-only");
209 CUmemAccessDesc desc;
210 desc.flags = permissions.read ?
212 CU_MEM_ACCESS_FLAGS_PROT_READWRITE :
213 CU_MEM_ACCESS_FLAGS_PROT_READ) :
214 CU_MEM_ACCESS_FLAGS_PROT_NONE;
216 desc.location = pool::detail_::create_mem_location(device_id);
217 auto status = cuMemPoolSetAccess(pool_handle, &desc, 1);
219 "Setting access get_permissions for " + cuda::device::detail_::identify(device_id)
220 +
" to " + pool::detail_::identify(pool_handle));
225 permissions_t get_permissions(
const cuda::device_t& device,
const pool_t& pool);
226 void set_permissions(
const cuda::device_t& device,
const pool_t& pool, permissions_t permissions);
227 template <
typename DeviceRange>
228 void get_permissions(DeviceRange
devices,
const pool_t& pool_handle, permissions_t permissions);
232 struct reuse_policy_t {
237 bool when_dependent_on_free;
243 bool independent_but_actually_freed;
250 bool allow_waiting_for_frees;
255 class imported_ptr_t;
268 region_t allocate(
const stream_t& stream,
size_t num_bytes)
const;
270 pool::ipc::imported_ptr_t
import(
const memory::pool::ipc::ptr_handle_t& exported_handle)
const;
272 void trim(
size_t min_bytes_to_keep)
const 274 auto status = cuMemPoolTrimTo(handle_, min_bytes_to_keep);
276 +
" down to " + ::std::to_string(min_bytes_to_keep));
279 template<pool::attribute_t attribute>
280 pool::attribute_value_t<attribute> get_attribute()
const 282 auto attribute_with_status = pool::detail_::get_attribute_with_status<attribute>(handle_);
284 + ::std::to_string(static_cast<int>(attribute)) +
" of " + pool::detail_::identify(*
this));
285 return attribute_with_status.value;
288 template<pool::attribute_t attribute>
289 void set_attribute(
const pool::attribute_value_t<attribute>& value)
const 291 using outer_type = pool::attribute_value_t <attribute>;
292 using inner_type =
typename pool::detail_::attribute_value_inner_type<outer_type>::type;
293 auto inner_value =
static_cast<inner_type
>(value);
294 auto status = cuMemPoolSetAttribute(handle_, attribute, &inner_value);
295 throw_if_error_lazy(status,
"Failed setting attribute " + ::std::to_string(static_cast<int>(attribute))
296 +
" of " + pool::detail_::identify(*
this));
299 size_t release_threshold()
const 301 return static_cast<size_t>(get_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>());
304 void set_release_threshold(
size_t threshold)
const 306 set_attribute<CU_MEMPOOL_ATTR_RELEASE_THRESHOLD>(threshold);
311 return memory::get_permissions(device, *
this);
328 void set_permissions(
const cuda::device_t& device, permissions_t permissions)
330 return memory::set_permissions(device, *
this, permissions);
337 template <
typename DeviceRange>
338 void set_permissions(DeviceRange
devices, permissions_t permissions)
340 return memory::set_permissions(devices, *
this, permissions);
350 pool::reuse_policy_t reuse_policy()
const 353 get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(),
354 get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(),
355 get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>()
363 void set_reuse_policy(pool::reuse_policy_t reuse_policy)
const 365 set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(reuse_policy.when_dependent_on_free);
366 set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(reuse_policy.independent_but_actually_freed);
367 set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(reuse_policy.allow_waiting_for_frees);
370 #if CUDA_VERSION >= 11030 375 size_t backing_memory_size()
const {
376 return get_attribute<CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT>();
397 bool is_owning() const noexcept {
return owning_; }
403 pool_t(
const pool_t& other) =
delete;
405 pool_t(pool_t&& other) noexcept : pool_t(other.device_id_, other.handle_, other.owning_)
407 other.owning_ =
false;
413 cuMemPoolDestroy(handle_);
419 : device_id_(device_id), handle_(handle), owning_(owning)
428 inline bool operator==(
const pool_t& lhs,
const pool_t& rhs)
431 return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
434 inline bool operator!=(
const pool_t& lhs,
const pool_t& rhs)
436 return not (lhs == rhs);
443 return { device_id, handle, owning };
448 template<shared_handle_kind_t SharedHandleKind = shared_handle_kind_t::no_export>
451 auto props = create_raw_properties<SharedHandleKind>(device_id);
453 auto status = cuMemPoolCreate(&handle, &props);
454 throw_if_error_lazy(status,
"Failed creating a memory pool on device " + cuda::device::detail_::identify(device_id));
455 constexpr
const bool is_owning {
true };
456 return wrap(device_id, handle, is_owning);
459 inline ::std::string identify(
const pool_t& pool)
461 return identify(pool.handle(), pool.device_id());
466 template<shared_handle_kind_t SharedHandleKind>
475 #endif // CUDA_VERSION >= 11020 477 #endif // CUDA_API_WRAPPERS_MEMORY_POOL_HPP_ Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
detail_::all_devices devices()
Definition: devices.hpp:224
Wrapper class for a CUDA device.
Definition: device.hpp:135
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77