eyalroz/cuda-api-wrappers/api_2types_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
 #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_

 #ifdef _MSC_VER
 // See @url https://stackoverflow.com/q/4913922/1593077
 #define NOMINMAX
 #endif

 #if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L))
 #error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard"
 #endif

 #include "detail/optional.hpp"
 #include "detail/optional_ref.hpp"
 #include "detail/span.hpp"
 #include "detail/region.hpp"
 #include "detail/type_traits.hpp"

 #ifndef __CUDACC__
 #include <builtin_types.h>
 #endif
 #include <cuda.h>

 #include <type_traits>
 #include <utility>
 #include <cassert>
 #include <cstddef> // for ::std::size_t
 #include <cstdint>
 #include <vector>
 #ifndef NDEBUG
 #include <stdexcept>
 #endif

 #ifndef __CUDACC__
 #ifndef __device__
 #define __device__
 #define __host__
 #endif
 #endif


 namespace cuda {

 // This alias for plain C arrays is required due to an MSVC bug, making it fail to
 // accept straight up C array reference parameters to functions under some circumstances;
 // see: https://developercommunity.visualstudio.com/t/MSVC-rejects-syntax-of-reference-to-C-ar/10792039
 template <typename T, size_t N>
 using c_array = T[N];

 using status_t = CUresult;

 using size_t = ::std::size_t;

 using dimensionality_t = size_t;

 namespace array {

 using dimension_t = size_t;

 template<dimensionality_t NumDimensions>
 struct dimensions_t;

 template<>
 struct dimensions_t<3> // this almost-inherits cudaExtent
 {
     dimension_t width, height, depth;

     constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_, dimension_t depth_)
         : width(width_), height(height_), depth(depth_) { }
     constexpr __host__ __device__ dimensions_t(cudaExtent e)
         : dimensions_t(e.width, e.height, e.depth) { }
     constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
         : dimensions_t(other.width, other.height, other.depth) { }
     constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
         : dimensions_t(other.width, other.height, other.depth) { }
     constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
         : dimensions_t(linear_size, 1, 1) { }

     CPP14_CONSTEXPR dimensions_t& operator=(const dimensions_t& other) = default;
     CPP14_CONSTEXPR dimensions_t& operator=(dimensions_t&& other) = default;

     constexpr __host__ __device__ operator cudaExtent() const
     {
         return { width, height, depth };
             // Note: We're not using make_cudaExtent here because:
             // 1. It's not constexpr and
             // 2. It doesn't do anything except construct the plain struct - as of CUDA 10 at least
     }

     constexpr __host__ __device__ size_t volume() const { return width * height * depth; }

     constexpr __host__ __device__ size_t size() const { return volume(); }

     constexpr __host__ __device__ dimensionality_t dimensionality() const
     {
         return ((width > 1) + (height> 1) + (depth > 1));
     }

     static constexpr __host__ __device__ dimensions_t cube(dimension_t x)   { return dimensions_t{ x, x, x }; }

     // dimensions
     static constexpr __host__ __device__ dimensions_t zero() { return cube(0); }
 };

 template<>
 struct dimensions_t<2>
 {
     dimension_t width, height;

     constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_)
         : width(width_), height(height_) { }
     constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
         : dimensions_t(other.width, other.height) { }
     constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
         : dimensions_t(other.width, other.height) { }
     constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
         : dimensions_t(linear_size, 1) { }

     CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(const dimensions_t& other)
     {
         width = other.width; height = other.height;
         return *this;

     }
     CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(dimensions_t&& other)
     {
         width = other.width; height = other.height;
         return *this;
     }

     constexpr __host__ __device__ size_t area() const { return width * height; }

     constexpr __host__ __device__ size_t size() const { return area(); }

     constexpr __host__ __device__ dimensionality_t dimensionality() const
     {
         return ((width > 1) + (height> 1));
     }

     // Named constructor idioms

     static constexpr __host__ __device__ dimensions_t square(dimension_t x)   { return dimensions_t{ x, x }; }

     // dimensions
     static constexpr __host__ __device__ dimensions_t zero() { return square(0); }
 };

 } // namespace array

 namespace event {

 using handle_t = CUevent;

 namespace ipc {

 using handle_t = CUipcEventHandle;

 } // namespace ipc

 } // namespace event

 namespace stream {

 using handle_t = CUstream;

 using priority_t       = int;
 enum : priority_t {
     default_priority   = 0
 };


 #if CUDA_VERSION >= 10000
 using callback_t = CUhostFn;
 #else
 using callback_t = CUstreamCallback;
 #endif

 #if CUDA_VERSION >= 10000

 namespace capture {

 enum class mode_t : ::std::underlying_type<CUstreamCaptureMode>::type {
     global        = CU_STREAM_CAPTURE_MODE_GLOBAL,
     thread        = CU_STREAM_CAPTURE_MODE_THREAD_LOCAL,
     thread_local_ = thread,
     relaxed       = CU_STREAM_CAPTURE_MODE_RELAXED
 };

 enum class state_t : ::std::underlying_type<CUstreamCaptureStatus>::type {
     active        = CU_STREAM_CAPTURE_STATUS_ACTIVE,
     capturing     = active,
     invalidated   = CU_STREAM_CAPTURE_STATUS_INVALIDATED,
     none          = CU_STREAM_CAPTURE_STATUS_NONE,
     not_capturing = none
 };

 } // namespace capture

 inline bool is_capturing(capture::state_t status) noexcept
 {
     return status == capture::state_t::active;
 }

 #endif // CUDA_VERSION >= 10000

 } // namespace stream

 namespace grid {

 using dimension_t        = decltype(dim3::x);

 using block_dimension_t  = dimension_t;


 struct dimensions_t // this almost-inherits dim3
 {
     dimension_t x, y, z;
     constexpr __host__ __device__ dimensions_t(dimension_t x_ = 1, dimension_t y_ = 1, dimension_t z_ = 1) noexcept
         : x(x_), y(y_), z(z_) { }

     constexpr __host__ __device__ dimensions_t(const uint3& v) noexcept : dimensions_t(v.x, v.y, v.z) { }
     constexpr __host__ __device__ dimensions_t(const dim3& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }
     constexpr __host__ __device__ dimensions_t(dim3&& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }

     constexpr __host__ __device__ operator uint3(void) const { return { x, y, z }; }

     // This _should_ have been constexpr, but nVIDIA have not marked the dim3 constructors
     // as constexpr, so it isn't
     __host__ __device__ operator dim3(void) const noexcept { return { x, y, z }; }

     constexpr __host__ __device__ size_t volume() const noexcept { return static_cast<size_t>(x) * y * z; }

     constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
     {
         return ((z > 1) + (y > 1) + (x > 1));
     }

     // Named constructor idioms

     static constexpr __host__ __device__ dimensions_t cube(dimension_t x) noexcept   { return dimensions_t{ x, x, x }; }

     static constexpr __host__ __device__ dimensions_t square(dimension_t x) noexcept { return dimensions_t{ x, x, 1 }; }

     static constexpr __host__ __device__ dimensions_t line(dimension_t x) noexcept   { return dimensions_t{ x, 1, 1 }; }

     static constexpr __host__ __device__ dimensions_t point() noexcept               { return dimensions_t{ 1, 1, 1 }; }

     static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
     {
         return
             (rhs.x % lhs.x == 0) and
             (rhs.y % lhs.y == 0) and
             (rhs.z % lhs.z == 0);
     }

     constexpr dimension_t operator[](int i) const noexcept {
         return (i == 0) ? x :
                (i == 1) ? y :
                z;
     }
     CPP14_CONSTEXPR dimension_t& operator[](int i) noexcept {
         return (i == 0) ? x :
                (i == 1) ? y :
                z;
     }
 };

 constexpr inline bool operator==(const dim3& lhs, const dim3& rhs) noexcept
 {
     return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
 }
 constexpr inline bool operator!=(const dim3& lhs, const dim3& rhs) noexcept
 {
     return not (lhs == rhs);
 }
 constexpr inline bool operator==(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
 {
     return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
 }
 constexpr inline bool operator!=(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
 {
     return not (lhs == rhs);
 }


 using block_dimensions_t = dimensions_t;

 using overall_dimension_t = size_t;

 struct overall_dimensions_t
 {
     using dimension_type = overall_dimension_t;
     dimension_type x, y, z;

     constexpr __host__ __device__ overall_dimensions_t(
         dimension_type width_, dimension_type height_, dimension_type depth_) noexcept
         : x(width_), y(height_), z(depth_) { }

     constexpr __host__ __device__ overall_dimensions_t(const dim3& dims) noexcept
         : x(dims.x), y(dims.y), z(dims.z) { }

     constexpr __host__ __device__ overall_dimensions_t(dim3&& dims) noexcept
         : x(dims.x), y(dims.y), z(dims.z) { }

     constexpr __host__ __device__ overall_dimensions_t(const overall_dimensions_t& other) noexcept
         : overall_dimensions_t(other.x, other.y, other.z) { }

     constexpr __host__ __device__ overall_dimensions_t(overall_dimensions_t&& other) noexcept
         : overall_dimensions_t(other.x, other.y, other.z) { }

     explicit constexpr __host__ __device__ overall_dimensions_t(dimensions_t dims) noexcept
         : overall_dimensions_t(dims.x, dims.y, dims.z) { }

     CPP14_CONSTEXPR overall_dimensions_t& operator=(const overall_dimensions_t& other) noexcept = default;
     CPP14_CONSTEXPR overall_dimensions_t& operator=(overall_dimensions_t&& other) noexcept = default;

     constexpr __host__ __device__ size_t volume() const noexcept { return x * y * z; }
     constexpr __host__ __device__ size_t size() const noexcept { return volume(); }
     constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
     {
         return ((x > 1) + (y > 1) + (z > 1));
     }

     constexpr dimension_type operator[](int i) const noexcept {
         return (i == 0) ? x :
                (i == 1) ? y :
                z;
     }
     CPP14_CONSTEXPR dimension_type& operator[](int i) noexcept {
         return (i == 0) ? x :
                (i == 1) ? y :
                z;
     }
 };

 constexpr bool operator==(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
 {
     return (lhs.x == rhs.x) and (lhs.y == rhs.y) and (lhs.z == rhs.z);
 }

 constexpr bool operator!=(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
 {
     return not (lhs == rhs);
 }

 constexpr overall_dimensions_t operator*(dimensions_t grid_dims, block_dimensions_t block_dims) noexcept
 {
     return overall_dimensions_t {
         grid_dims.x * overall_dimension_t { block_dims.x },
         grid_dims.y * overall_dimension_t { block_dims.y },
         grid_dims.z * overall_dimension_t { block_dims.z },
     };
 }

 struct composite_dimensions_t {
     grid::dimensions_t       grid;
     grid::block_dimensions_t block;

     constexpr overall_dimensions_t flatten() const noexcept { return grid * block; }

     constexpr size_t volume() const noexcept { return flatten().volume(); }

     constexpr size_t dimensionality() const noexcept { return flatten().dimensionality(); }

     static constexpr composite_dimensions_t point() noexcept
     {
         return { dimensions_t::point(), block_dimensions_t::point() };
     }

 #if __cplusplus >= 202002L
     constexpr bool operator==(const composite_dimensions_t&) const noexcept = default;
     constexpr bool operator!=(const composite_dimensions_t&) const noexcept = default;
 #endif
 };

 #if __cplusplus < 202002L
 constexpr bool operator==(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
 {
     return (lhs.grid == rhs.grid) and (lhs.block == rhs.block);
 }

 constexpr bool operator!=(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
 {
     return not (lhs == rhs);
 }
 #endif // __cplusplus < 202002L

 } // namespace grid

 namespace memory {

 #if CUDA_VERSION >= 10020

 struct permissions_t {
     bool read;
     bool write;

     operator CUmemAccess_flags() const noexcept
     {
         return read ?
                (write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
                CU_MEM_ACCESS_FLAGS_PROT_NONE;
     }

 };

 namespace permissions {

 constexpr inline permissions_t none()           { return permissions_t{ false, false }; }
 constexpr inline permissions_t read_only()      { return permissions_t{ true,  false }; }
 constexpr inline permissions_t write_only()     { return permissions_t{ false, true  }; }
 constexpr inline permissions_t read_and_write() { return permissions_t{ true,  true  }; }

 namespace detail_ {

 inline permissions_t from_flags(CUmemAccess_flags access_flags)
 {
     bool read = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READ);
     bool write = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READWRITE);
     return permissions_t{read, write};
 }

 } // namespace detail_

 } // namespace permissions


 namespace physical_allocation {

 enum class shared_handle_kind_t : ::std::underlying_type<CUmemAllocationHandleType>::type {
 #if CUDA_VERSION >= 11020
     no_export             = CU_MEM_HANDLE_TYPE_NONE,
 #endif
     posix_file_descriptor = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
     win32_handle          = CU_MEM_HANDLE_TYPE_WIN32,
     win32_kmt             = CU_MEM_HANDLE_TYPE_WIN32_KMT,
 };

 namespace detail_ {

 template<shared_handle_kind_t SharedHandleKind> struct shared_handle_type_helper;

 template <> struct shared_handle_type_helper<shared_handle_kind_t::posix_file_descriptor> { using type = int; };
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 template <> struct shared_handle_type_helper<shared_handle_kind_t::win32_handle> { using type = void *; };
 #endif
 // TODO: What about WIN32_KMT?

 } // namespace detail_

 template<shared_handle_kind_t SharedHandleKind>
 using shared_handle_t = typename detail_::shared_handle_type_helper<SharedHandleKind>::type;

 } // namespace physical_allocation
 #endif // CUDA_VERSION >= 10020
 #if CUDA_VERSION >= 11020

 namespace pool {

 using handle_t = CUmemoryPool;

 using shared_handle_kind_t = physical_allocation::shared_handle_kind_t;

 using physical_allocation::shared_handle_t;

 namespace ipc {

 using ptr_handle_t = CUmemPoolPtrExportData;

 } // namespace ipc

 } // namespace pool
 #endif // CUDA_VERSION >= 11020

 namespace pointer {

 using attribute_t = CUpointer_attribute;

 } // namespace pointer

 namespace device {


 using address_t = CUdeviceptr;

 static_assert(sizeof(void *) == sizeof(device::address_t), "Unexpected address size");

 inline address_t address(const void* device_ptr) noexcept
 {
     static_assert(sizeof(void*) == sizeof(address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
     return reinterpret_cast<address_t>(device_ptr);
 }

 inline address_t address(memory::const_region_t region) noexcept { return address(region.start()); }

 } // namespace device

 inline void* as_pointer(device::address_t address) noexcept
 {
     static_assert(sizeof(void*) == sizeof(device::address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
     return reinterpret_cast<void*>(address);
 }

 namespace shared {

 using size_t = unsigned;

 } // namespace shared

 namespace managed {

 enum class initial_visibility_t {
     to_all_devices,
     to_supporters_of_concurrent_managed_access,
 };

 } // namespace managed

 #if CUDA_VERSION >= 11070
 enum class barrier_scope_t : typename ::std::underlying_type<CUstreamMemoryBarrier_flags>::type {
     device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
     system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
 };
 #endif // CUDA_VERSION >= 11700

 #if CUDA_VERSION >= 10000
 namespace external {

 using handle_t = CUexternalMemory;

 struct subregion_spec_t {
     size_t offset;
     size_t size;
 };

 } // namespace external

 #endif // CUDA_VERSION >= 10000

 } // namespace memory

 struct launch_configuration_t;

 enum class multiprocessor_cache_preference_t : ::std::underlying_type<CUfunc_cache_enum>::type {
     no_preference                 = CU_FUNC_CACHE_PREFER_NONE,
     equal_l1_and_shared_memory    = CU_FUNC_CACHE_PREFER_EQUAL,
     prefer_shared_memory_over_l1  = CU_FUNC_CACHE_PREFER_SHARED,
     prefer_l1_over_shared_memory  = CU_FUNC_CACHE_PREFER_L1,
     // aliases
     none                          = no_preference,
     equal                         = equal_l1_and_shared_memory,
     prefer_shared                 = prefer_shared_memory_over_l1,
     prefer_l1                     = prefer_l1_over_shared_memory,
 };

 enum multiprocessor_shared_memory_bank_size_option_t
     : ::std::underlying_type<CUsharedconfig>::type
 {
     device_default       = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE,
     four_bytes_per_bank  = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE,
     eight_bytes_per_bank = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
 };

 namespace device {

 using id_t               = CUdevice;

 using attribute_t        = CUdevice_attribute;
 using attribute_value_t  = int;

 namespace peer_to_peer {

 using attribute_t = CUdevice_P2PAttribute;

 } // namespace peer_to_peer

 } // namespace device

 namespace context {

 using handle_t = CUcontext;

 using flags_t = unsigned;

 enum host_thread_sync_scheduling_policy_t : unsigned int {

     heuristic = CU_CTX_SCHED_AUTO,

     default_ = heuristic,

     spin      = CU_CTX_SCHED_SPIN,

     block     = CU_CTX_SCHED_BLOCKING_SYNC,

     yield     = CU_CTX_SCHED_YIELD,

     automatic = heuristic,
 };

 } // namespace context

 namespace device {

 using flags_t = context::flags_t;

 namespace primary_context {

 using handle_t = cuda::context::handle_t;

 } // namespace primary_context

 using host_thread_sync_scheduling_policy_t = context::host_thread_sync_scheduling_policy_t;

 } // namespace device

 namespace detail_ {

 template <typename T, typename U>
 inline T identity_cast(U&& x)
 {
     static_assert(::std::is_same<
             typename ::std::remove_reference<T>::type,
             typename ::std::remove_reference<U>::type
         >::value,
         "Casting to a different type - don't use identity_cast");
     return static_cast<T>(::std::forward<U>(x));
 }

 } // namespace detail_

 using uuid_t = CUuuid;

 namespace module {

 using handle_t = CUmodule;

 } // namespace module

 namespace kernel {

 using attribute_t = CUfunction_attribute;

 using attribute_value_t = int;

 // A raw CUDA driver handle for a kernel; prefer using the @ref cuda::kernel_t type.
 using handle_t = CUfunction;

 } // namespace kernel

 #if CUDA_VERSION >= 10000

 namespace graph {

 namespace node {

 using handle_t = CUgraphNode;
 using const_handle_t = CUgraphNode_st const *;

 constexpr const const_handle_t no_handle = nullptr;

 } // namespace node

 namespace template_ {

 using handle_t = CUgraph;
 constexpr const handle_t null_handle = nullptr;

 } // namespace template_

 namespace instance {

 using handle_t = CUgraphExec;

 } // namespace instance

 } // namespace graph

 #endif // CUDA_VERSION >= 10000

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
cuda::device::attribute_value_t
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:860

cuda::grid::composite_dimensions_t::dimensionality
constexpr size_t dimensionality() const noexcept
Definition: types.hpp:519

cuda::context::spin
Keep control and spin-check for result availability.
Definition: types.hpp:912

cuda::kernel::attribute_value_t
int attribute_value_t
The uniform type the CUDA driver uses for all kernel attributes; it is typically more appropriate to ...
Definition: types.hpp:988

cuda::context::default_
Alias for the default behavior; see heuristic .
Definition: types.hpp:901

cuda::device::primary_context::handle_t
cuda::context::handle_t handle_t
Raw CUDA driver handle for a device&#39;s primary context.
Definition: types.hpp:946

cuda::grid::dimension_t
decltype(dim3::x) dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:299

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::stream::priority_t
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246

cuda::multiprocessor_cache_preference_t::prefer_l1_over_shared_memory
Divide the cache resources to maximize available L1 cache at the expense of shared memory...

cuda::launch_configuration_t
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69

cuda::rtc::program::handle_t
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries&#39;s API calls: // The NVRTC librar...
Definition: types.hpp:124

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::array::dimensions_t< 3 >::width
dimension_t width
The three constituent individual dimensions, named.
Definition: types.hpp:112

cuda::grid::block_dimension_t
dimension_t block_dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:312

cuda::context::heuristic
Default behavior; yield or spin based on a heuristic.
Definition: types.hpp:896

cuda::context::yield
Yield control while waiting for results.
Definition: types.hpp:931

cuda::grid::overall_dimensions_t::operator[]
constexpr dimension_type operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:470

cuda::uuid_t
CUuuid uuid_t
The CUDA-driver-specific representation of a UUID value; see also {device_t::uuid()}.
Definition: types.hpp:971

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::grid::dimensions_t::divides
static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
Definition: types.hpp:367

cuda::grid::dimensions_t::operator[]
constexpr dimension_t operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:379

cuda::device::host_thread_sync_scheduling_policy_t
context::host_thread_sync_scheduling_policy_t host_thread_sync_scheduling_policy_t
Definition: types.hpp:951

cuda::grid::dimensions_t
A richer (kind-of-a-)wrapper for CUDA&#39;s dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:325

cuda::event::handle_t
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217

cuda::multiprocessor_shared_memory_bank_size_option_t
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)&#39;s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:830

cuda::device::attribute_t
CUdevice_attribute attribute_t
CUDA devices have both "attributes" and "properties".
Definition: types.hpp:856

cuda::profiling::mark::point
void point(const CharT *description, color_t color=color_t::Black())
Mark a single point on the profiler timeline, giving it also a color and some descriptive text...
Definition: profiling.hpp:185

cuda::memory::shared::size_t
unsigned size_t
Each physical core ("Symmetric Multiprocessor") on an nVIDIA GPU has a space of shared memory (see th...
Definition: types.hpp:730

cuda::array::dimensions_t< 2 >::width
dimension_t width
The two constituent individual dimensions, named; no "depth" for the 2D case.
Definition: types.hpp:162

cuda::context::host_thread_sync_scheduling_policy_t
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884

cuda::context::block
Block the thread until results are available.
Definition: types.hpp:920

cuda::dimensionality_t
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:85

cuda::memory::pointer::attribute_t
CUpointer_attribute attribute_t
Raw CUDA driver choice type for attributes of pointers.
Definition: types.hpp:662

cuda::size_t
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81

cuda::memory::device::address
address_t address(memory::const_region_t region) noexcept
Definition: types.hpp:693

cuda::stream::callback_t
CUstreamCallback callback_t
The CUDA driver&#39;s raw handle for a host-side callback function.
Definition: types.hpp:257

cuda::multiprocessor_cache_preference_t
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:804

cuda::grid::composite_dimensions_t
Composite dimensions for a grid - in terms of blocks, then also down into the block dimensions comple...
Definition: types.hpp:508

cuda::device::peer_to_peer::attribute_t
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:869

cuda::operator==
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762

cuda::array::handle_t
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34

cuda::grid::overall_dimension_t
size_t overall_dimension_t
Dimension of a grid in threads along one axis, i.e.
Definition: types.hpp:426

cuda::grid::overall_dimensions_t
Dimensions of a grid in threads, i.e.
Definition: types.hpp:432

cuda::memory::managed::const_region_t
detail_::region_helper< memory::const_region_t > const_region_t
A child class of the generic const_region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1962

cuda::array::dimension_t
size_t dimension_t
An individual dimension extent for an array.
Definition: types.hpp:94

cuda::stream::handle_t
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239

cuda::memory::as_pointer
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700

cuda::stream::default_priority
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:249

cuda::grid::composite_dimensions_t::flatten
constexpr overall_dimensions_t flatten() const noexcept
Definition: types.hpp:513

cuda::memory::device::address_t
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: types.hpp:672

cuda::grid::composite_dimensions_t::volume
constexpr size_t volume() const noexcept
Definition: types.hpp:516

cuda::context::automatic
see heuristic
Definition: types.hpp:934

cuda::multiprocessor_cache_preference_t::equal_l1_and_shared_memory
Divide the cache resources equally between actual L1 cache and shared memory.

cuda::grid::composite_dimensions_t::point
static constexpr composite_dimensions_t point() noexcept
A named constructor idiom for the composite dimensions of a single-block grid with a single-thread bl...
Definition: types.hpp:523

cuda::multiprocessor_cache_preference_t::prefer_shared_memory_over_l1
Divide the cache resources to maximize available shared memory at the expense of L1 cache...

cuda::multiprocessor_cache_preference_t::no_preference
No preference for more L1 cache or for more shared memory; the API can do as it please.

cuda::memory::managed::initial_visibility_t
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753

cuda::status_t
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77

cuda::array::dimensions_t
CUDA&#39;s array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:105

cuda::kernel::attribute_t
CUfunction_attribute attribute_t
Raw CUDA driver selector of a kernel attribute.
Definition: types.hpp:983

cuda::memory::device::zero
void zero(void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:416