cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
pointer.hpp
Go to the documentation of this file.
1 
15 #pragma once
16 #ifndef CUDA_API_WRAPPERS_POINTER_HPP_
17 #define CUDA_API_WRAPPERS_POINTER_HPP_
18 
19 #include "constants.hpp"
20 #include "error.hpp"
21 #include "types.hpp"
22 
23 #ifndef NDEBUG
24 #include <cassert>
25 #endif
26 
27 namespace cuda {
28 
30 class device_t;
31 class context_t;
33 
34 namespace memory {
35 
41 enum type_t : ::std::underlying_type<CUmemorytype>::type {
42  host_ = CU_MEMORYTYPE_HOST,
43  device_ = CU_MEMORYTYPE_DEVICE,
44  array = CU_MEMORYTYPE_ARRAY,
45  unified_ = CU_MEMORYTYPE_UNIFIED,
46  managed_ = CU_MEMORYTYPE_UNIFIED, // an alias (more like the runtime API name)
47  non_cuda = ~(::std::underlying_type<CUmemorytype>::type{0})
48 };
49 
50 namespace pointer {
51 
52 namespace detail_ {
53 
54 // Note: We could theoretically template this, but - there don't seem to be a lot of "clients" for this
55 // function right now, and I would rather not drag in <tuple>
56 void get_attributes(unsigned num_attributes, pointer::attribute_t* attributes, void** value_ptrs, const void* ptr);
57 
58 template <attribute_t attribute> struct attribute_value {};
59 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_CONTEXT> { using type = context::handle_t;};
60 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MEMORY_TYPE> { using type = memory::type_t;};
61 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_DEVICE_POINTER> { using type = void*;};
62 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_HOST_POINTER> { using type = void*;};
63 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_P2P_TOKENS> { using type = struct CUDA_POINTER_ATTRIBUTE_P2P_TOKEN;};
64 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_SYNC_MEMOPS> { using type = int;};
65 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_BUFFER_ID> { using type = unsigned long long;};
66 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_IS_MANAGED> { using type = int;};
67 #if CUDA_VERSION >= 9020
68 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL> { using type = cuda::device::id_t;};
69 #if CUDA_VERSION >= 10020
70 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_RANGE_START_ADDR> { using type = void*;};
71 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_RANGE_SIZE> { using type = size_t;};
72 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MAPPED> { using type = int;};
73 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE> { using type = int;};
74 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES> { using type = uint64_t;};
75 #if CUDA_VERSION >= 11030
76 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE> { using type = pool::handle_t;};
77 #endif // CUDA_VERSION >= 11030
78 #endif // CUDA_VERSION >= 10020
79 #endif // CUDA_VERSION >= 9020
80 
81 template <CUpointer_attribute attribute>
82 using attribute_value_t = typename attribute_value<attribute>::type;
83 
84 template<attribute_t attribute>
85 struct status_and_attribute_value {
86  status_t status;
87  attribute_value_t<attribute> value;
88 };
89 
90 template<attribute_t attribute>
91 status_and_attribute_value<attribute> get_attribute_with_status(const void *ptr);
92 
93 template <attribute_t attribute>
94 attribute_value_t<attribute> get_attribute(const void* ptr);
95 
96 inline context::handle_t context_handle_of(const void* ptr)
97 {
98  return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_CONTEXT>(ptr);
99 }
100 
101 inline cuda::device::id_t device_id_of(const void* ptr);
102 
103 } // namespace detail_
104 
105 } // namespace pointer
106 
112 inline memory::type_t type_of(const void* ptr)
113 {
114  auto result = pointer::detail_::get_attribute_with_status<CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(ptr);
115  // Note: As of CUDA 12, CUDA treats passing a non-CUDA-allocated pointer to the memory type check
116  // as an error, though it really should not be
117  return (result.status == status::named_t::invalid_value) ?
118  memory::type_t::non_cuda : result.value;
119 }
120 
123 context_t context_of(void const* ptr);
124 
130 template <typename T>
131 class pointer_t {
132 public: // getters and operators
133 
138  T* get() const { return ptr_; }
140  operator T*() const { return ptr_; }
142 
143 protected:
144  template <pointer::attribute_t attribute>
145  pointer::detail_::attribute_value_t<attribute> get_attribute() const
146  {
147  return pointer::detail_::get_attribute<attribute>(ptr_);
148  }
149 
150 public: // other non-mutators
151 
155  device_t device() const;
156 
161  context_t context() const;
162 
170  T* get_for_device() const
171  {
172  return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_DEVICE_POINTER>(ptr_);
173  }
174 
182  T* get_for_host() const
183  {
184  return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_HOST_POINTER>(ptr_);
185  }
186 
187 #if CUDA_VERSION >= 10020
188  region_t containing_range() const
189  {
190  // TODO: Consider checking the alignment
191  auto range_start = pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_RANGE_START_ADDR>(ptr_);
192  auto range_size = pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_RANGE_SIZE>(ptr_);
193  return { range_start, range_size};
194  }
195 #endif
196 
209  {
210  pointer::attribute_t attributes[] = {
211  CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
212  CU_POINTER_ATTRIBUTE_HOST_POINTER,
213  CU_POINTER_ATTRIBUTE_DEVICE_POINTER
214  };
215  type_t memory_type;
216  T* host_ptr;
217  T* device_ptr;
218  void* value_ptrs[] = { &memory_type, &host_ptr, &device_ptr };
219  pointer::detail_::get_attributes(3, attributes, value_ptrs, ptr_);
220 
221 #ifndef NDEBUG
222  assert(host_ptr == ptr_ or device_ptr == ptr_);
223 #endif
224  return { ptr_ == host_ptr ? device_ptr : host_ptr };
225  }
226 
227 public: // constructors
229  pointer_t(T* ptr) noexcept : ptr_(ptr) { }
230  pointer_t(const pointer_t& other) noexcept = default;
231  pointer_t(pointer_t&& other) noexcept = default;
232 
233 protected: // data members
234  T* const ptr_;
235 };
236 
237 namespace pointer {
238 
245 template<typename T>
246 inline pointer_t<T> wrap(T* ptr) noexcept { return { ptr }; }
247 
248 } // namespace pointer
249 } // namespace memory
250 } // namespace cuda
251 
252 #endif // CUDA_API_WRAPPERS_POINTER_HPP_
range::handle_t range_start(const CharT *description, range::type_t type=range::type_t::unspecified, color_t color=color_t::LightRed())
Mark the beginning of a range on the profiler timeline, giving it also a color and some descriptive t...
Definition: profiling.hpp:203
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
pointer_t(T *ptr) noexcept
Wrap a raw pointer in this class.
Definition: pointer.hpp:229
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
memory::type_t type_of(const void *ptr)
Determine the type of memory at a given address vis-a-vis the CUDA ecosystem: Was it allocated by the...
Definition: pointer.hpp:112
context_t context_of(const void *ptr)
Obtain (a non-owning wrapper for) the CUDA context with which a memory address is associated (e...
Definition: pointer.hpp:50
CUpointer_attribute attribute_t
Raw CUDA driver choice type for attributes of pointers.
Definition: types.hpp:662
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
pointer_t other_side_of_region_pair() const
Definition: pointer.hpp:208
A convenience wrapper around a raw pointer "known" to the CUDA runtime and which thus has various kin...
Definition: pointer.hpp:131
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
T * get_for_device() const
Definition: pointer.hpp:170
type_t
The CUDA execution ecosystem involves different memory spaces in their relation to a GPU device or th...
Definition: pointer.hpp:41
T * get_for_host() const
Definition: pointer.hpp:182
Wrapper class for a CUDA device.
Definition: device.hpp:135
Fundamental CUDA-related type definitions.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77