eyalroz/cuda-api-wrappers/pointer_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_POINTER_HPP_
 #define CUDA_API_WRAPPERS_POINTER_HPP_

 #include "constants.hpp"
 #include "error.hpp"
 #include "types.hpp"

 #ifndef NDEBUG
 #include <cassert>
 #endif

 namespace cuda {

 class device_t;
 class context_t;

 namespace memory {

 enum type_t : ::std::underlying_type<CUmemorytype>::type {
     host_         = CU_MEMORYTYPE_HOST,
     device_       = CU_MEMORYTYPE_DEVICE,
     array         = CU_MEMORYTYPE_ARRAY,
     unified_      = CU_MEMORYTYPE_UNIFIED,
     managed_      = CU_MEMORYTYPE_UNIFIED, // an alias (more like the runtime API name)
     non_cuda      = ~(::std::underlying_type<CUmemorytype>::type{0})
 };

 namespace pointer {

 namespace detail_ {

 // Note: We could theoretically template this, but - there don't seem to be a lot of "clients" for this
 // function right now, and I would rather not drag in <tuple>
 void get_attributes(unsigned num_attributes, pointer::attribute_t* attributes, void** value_ptrs, const void* ptr);

 template <attribute_t attribute> struct attribute_value {};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_CONTEXT>                    { using type = context::handle_t;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MEMORY_TYPE>                { using type = memory::type_t;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_DEVICE_POINTER>             { using type = void*;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_HOST_POINTER>               { using type = void*;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_P2P_TOKENS>                 { using type = struct CUDA_POINTER_ATTRIBUTE_P2P_TOKEN;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_SYNC_MEMOPS>                { using type = int;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_BUFFER_ID>                  { using type = unsigned long long;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_IS_MANAGED>                 { using type = int;};
 #if CUDA_VERSION >= 9020
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL>             { using type = cuda::device::id_t;};
 #if CUDA_VERSION >= 10020
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_RANGE_START_ADDR>           { using type = void*;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_RANGE_SIZE>                 { using type = size_t;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MAPPED>                     { using type = int;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE> { using type = int;};
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES>       { using type = uint64_t;};
 #if CUDA_VERSION >= 11030
 template <> struct attribute_value<CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE>             { using type = pool::handle_t;};
 #endif // CUDA_VERSION >= 11030
 #endif // CUDA_VERSION >= 10020
 #endif // CUDA_VERSION >= 9020

 template <CUpointer_attribute attribute>
 using attribute_value_t = typename attribute_value<attribute>::type;

 template<attribute_t attribute>
 struct status_and_attribute_value {
     status_t status;
     attribute_value_t<attribute> value;
 };

 template<attribute_t attribute>
 status_and_attribute_value<attribute> get_attribute_with_status(const void *ptr);

 template <attribute_t attribute>
 attribute_value_t<attribute> get_attribute(const void* ptr);

 inline context::handle_t context_handle_of(const void* ptr)
 {
     return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_CONTEXT>(ptr);
 }

 inline cuda::device::id_t device_id_of(const void* ptr);

 } // namespace detail_

 } // namespace pointer

 inline memory::type_t type_of(const void* ptr)
 {
     auto result = pointer::detail_::get_attribute_with_status<CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(ptr);
     // Note: As of CUDA 12, CUDA treats passing a non-CUDA-allocated pointer to the memory type check
     // as an error, though it really should not be
     return (result.status == status::named_t::invalid_value) ?
         memory::type_t::non_cuda : result.value;
 }

 context_t context_of(void const* ptr);

 template <typename T>
 class pointer_t {
 public: // getters and operators

     T* get() const { return ptr_; }
     operator T*() const { return ptr_; }

 protected:
     template <pointer::attribute_t attribute>
     pointer::detail_::attribute_value_t<attribute> get_attribute() const
     {
         return pointer::detail_::get_attribute<attribute>(ptr_);
     }

 public: // other non-mutators

     device_t device() const;

     context_t context() const;

     T* get_for_device() const
     {
         return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_DEVICE_POINTER>(ptr_);
     }

     T* get_for_host() const
     {
         return pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_HOST_POINTER>(ptr_);
     }

 #if CUDA_VERSION >= 10020
     region_t containing_range() const
     {
         // TODO: Consider checking the alignment
         auto range_start = pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_RANGE_START_ADDR>(ptr_);
         auto range_size = pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_RANGE_SIZE>(ptr_);
         return { range_start, range_size};
     }
 #endif

     pointer_t other_side_of_region_pair() const
     {
         pointer::attribute_t attributes[] = {
         CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
         CU_POINTER_ATTRIBUTE_HOST_POINTER,
         CU_POINTER_ATTRIBUTE_DEVICE_POINTER
         };
         type_t memory_type;
         T* host_ptr;
         T* device_ptr;
         void* value_ptrs[] = { &memory_type, &host_ptr, &device_ptr };
         pointer::detail_::get_attributes(3, attributes, value_ptrs, ptr_);

 #ifndef NDEBUG
         assert(host_ptr == ptr_ or device_ptr == ptr_);
 #endif
         return { ptr_ == host_ptr ? device_ptr : host_ptr };
     }

 public: // constructors
     pointer_t(T* ptr) noexcept : ptr_(ptr) { }
     pointer_t(const pointer_t& other) noexcept = default;
     pointer_t(pointer_t&& other) noexcept = default;

 protected: // data members
     T* const ptr_;
 };

 namespace pointer {

 template<typename T>
 inline pointer_t<T> wrap(T* ptr) noexcept { return { ptr }; }

 } // namespace pointer
 } // namespace memory
 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_POINTER_HPP_
cuda::profiling::mark::range_start
range::handle_t range_start(const CharT *description, range::type_t type=range::type_t::unspecified, color_t color=color_t::LightRed())
Mark the beginning of a range on the profiler timeline, giving it also a color and some descriptive t...
Definition: profiling.hpp:203

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::memory::managed::region_t
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::memory::pointer_t::pointer_t
pointer_t(T *ptr) noexcept
Wrap a raw pointer in this class.
Definition: pointer.hpp:229

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::memory::type_of
memory::type_t type_of(const void *ptr)
Determine the type of memory at a given address vis-a-vis the CUDA ecosystem: Was it allocated by the...
Definition: pointer.hpp:112

cuda::memory::context_of
context_t context_of(const void *ptr)
Obtain (a non-owning wrapper for) the CUDA context with which a memory address is associated (e...
Definition: pointer.hpp:50

cuda::memory::pointer::attribute_t
CUpointer_attribute attribute_t
Raw CUDA driver choice type for attributes of pointers.
Definition: types.hpp:662

cuda::size_t
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81

cuda::memory::pointer_t::other_side_of_region_pair
pointer_t other_side_of_region_pair() const
Definition: pointer.hpp:208

cuda::memory::pointer_t
A convenience wrapper around a raw pointer "known" to the CUDA runtime and which thus has various kin...
Definition: pointer.hpp:131

cuda::array::handle_t
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34

error.hpp
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...

constants.hpp
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...

cuda::memory::pointer_t::get_for_device
T * get_for_device() const
Definition: pointer.hpp:170

cuda::memory::type_t
type_t
The CUDA execution ecosystem involves different memory spaces in their relation to a GPU device or th...
Definition: pointer.hpp:41

cuda::memory::pointer_t::get_for_host
T * get_for_host() const
Definition: pointer.hpp:182

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

types.hpp
Fundamental CUDA-related type definitions.

cuda::status_t
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77