19 #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_ 20 #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_ 22 #if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L)) 23 #error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard" 30 #include <builtin_types.h> 34 #include <type_traits> 51 #ifndef CPP14_CONSTEXPR 52 #if __cplusplus >= 201402L 53 #define CPP14_CONSTEXPR constexpr 55 #define CPP14_CONSTEXPR 60 #define NOEXCEPT_IF_NDEBUG noexcept(true) 62 #define NOEXCEPT_IF_NDEBUG noexcept(false) 82 using bool_constant = ::std::integral_constant<bool, B>;
84 using true_type = bool_constant<true>;
85 using false_type = bool_constant<false>;
87 template<
bool...>
struct bool_pack;
90 using all_true = ::std::is_same<bool_pack<bs...,
true>, bool_pack<
true, bs...>>;
97 template<
bool B,
typename T =
void>
98 using enable_if_t = typename ::std::enable_if<B, T>::type;
101 using remove_reference_t = typename ::std::remove_reference<T>::type;
104 template <
typename,
typename =
void>
105 struct has_data_method : ::std::false_type { };
108 template <
typename T>
109 struct has_data_method<T, cuda::detail_::void_t<decltype(::std::declval<T>().data())>> : ::std::true_type { };
111 template <
typename,
typename =
void>
112 struct has_value_type_member : ::std::false_type { };
114 template <
typename T>
115 struct has_value_type_member<T, cuda::detail_::void_t<typename T::value_type>> : ::std::true_type { };
119 template <
typename T>
120 struct is_kinda_like_contiguous_container :
121 ::std::integral_constant<bool,
122 has_data_method<typename ::std::remove_reference<T>::type>::value
123 and has_value_type_member<typename ::std::remove_reference<T>::type>::value
138 using size_t = ::std::size_t;
148 using dimension_t = size_t;
158 template<dimensionality_t NumDimensions>
167 dimension_t width, height, depth;
169 constexpr __host__ __device__
dimensions_t(dimension_t width_, dimension_t height_, dimension_t depth_)
170 : width(width_), height(height_), depth(depth_) { }
171 constexpr __host__ __device__ dimensions_t(cudaExtent e)
172 : dimensions_t(e.width, e.height, e.depth) { }
173 constexpr __host__ __device__ dimensions_t(
const dimensions_t& other)
174 : dimensions_t(other.width, other.height, other.depth) { }
175 constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
176 : dimensions_t(other.width, other.height, other.depth) { }
177 constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
178 : dimensions_t(linear_size, 1, 1) { }
180 CPP14_CONSTEXPR dimensions_t& operator=(
const dimensions_t& other) =
default;
181 CPP14_CONSTEXPR dimensions_t& operator=(dimensions_t&& other) =
default;
183 constexpr __host__ __device__
operator cudaExtent()
const 185 return { width, height, depth };
191 constexpr __host__ __device__
size_t volume()
const {
return width * height * depth; }
192 constexpr __host__ __device__
size_t size()
const {
return volume(); }
195 return ((width > 1) + (height> 1) + (depth > 1));
200 static constexpr __host__ __device__ dimensions_t cube(dimension_t x) {
return dimensions_t{ x, x, x }; }
201 static constexpr __host__ __device__ dimensions_t
zero() {
return cube(0); }
210 dimension_t width, height;
212 constexpr __host__ __device__
dimensions_t(dimension_t width_, dimension_t height_)
213 : width(width_), height(height_) { }
214 constexpr __host__ __device__ dimensions_t(
const dimensions_t& other)
215 : dimensions_t(other.width, other.height) { }
216 constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
217 : dimensions_t(other.width, other.height) { }
218 constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
219 : dimensions_t(linear_size, 1) { }
221 CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(
const dimensions_t& other)
223 width = other.width; height = other.height;
227 CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(dimensions_t&& other)
229 width = other.width; height = other.height;
233 constexpr __host__ __device__
size_t area()
const {
return width * height; }
234 constexpr __host__ __device__
size_t size()
const {
return area(); }
237 return ((width > 1) + (height> 1));
242 static constexpr __host__ __device__ dimensions_t square(dimension_t x) {
return dimensions_t{ x, x }; }
243 static constexpr __host__ __device__ dimensions_t
zero() {
return square(0); }
299 #if CUDA_VERSION >= 10000 300 using callback_t = CUhostFn;
302 using callback_t = CUstreamCallback;
351 : x(x_), y(y_), z(z_) { }
353 constexpr __host__ __device__ dimensions_t(
const uint3& v) : dimensions_t(v.x, v.y, v.z) { }
354 constexpr __host__ __device__ dimensions_t(
const dim3& dims) : dimensions_t(dims.x, dims.y, dims.z) { }
355 constexpr __host__ __device__ dimensions_t(dim3&& dims) : dimensions_t(dims.x, dims.y, dims.z) { }
357 constexpr __host__ __device__
operator uint3(
void)
const {
return { x, y, z }; }
361 __host__ __device__
operator dim3(
void)
const {
return { x, y, z }; }
363 constexpr __host__ __device__
size_t volume()
const {
return static_cast<size_t>(x) * y * z; }
366 return ((z > 1) + (y > 1) + (x > 1));
371 static constexpr __host__ __device__ dimensions_t cube(
dimension_t x) {
return dimensions_t{ x, x, x }; }
372 static constexpr __host__ __device__ dimensions_t square(
dimension_t x) {
return dimensions_t{ x, x, 1 }; }
373 static constexpr __host__ __device__ dimensions_t line(
dimension_t x) {
return dimensions_t{ x, 1, 1 }; }
374 static constexpr __host__ __device__ dimensions_t point() {
return dimensions_t{ 1, 1, 1 }; }
376 static bool divides(dimensions_t lhs, dimensions_t rhs)
379 (rhs.x % lhs.x == 0) and
380 (rhs.y % lhs.y == 0) and
381 (rhs.z % lhs.z == 0);
386 constexpr
inline bool operator==(
const dim3& lhs,
const dim3& rhs) noexcept
388 return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
390 constexpr
inline bool operator!=(
const dim3& lhs,
const dim3& rhs) noexcept
392 return not (lhs == rhs);
396 return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
400 return not (lhs == rhs);
427 constexpr
size_t volume()
const;
428 constexpr
size_t dimensionality()
const;
432 return { dimensions_t::point(), block_dimensions_t::point() };
438 return (lhs.grid == rhs.grid) and (lhs.block == rhs.block);
443 return not (lhs == rhs);
461 dimension_type x, y, z;
464 dimension_type width_, dimension_type height_, dimension_type depth_) noexcept
465 : x(width_), y(height_), z(depth_) { }
468 : x(dims.x), y(dims.y), z(dims.z) { }
471 : x(dims.x), y(dims.y), z(dims.z) { }
485 constexpr __host__ __device__
size_t volume()
const noexcept {
return x * y * z; }
486 constexpr __host__ __device__
size_t size()
const noexcept {
return volume(); }
487 constexpr __host__ __device__
dimensionality_t dimensionality()
const noexcept
489 return ((x > 1) + (y > 1) + (z > 1));
495 return (lhs.x == rhs.x) and (lhs.y == rhs.y) and (lhs.z == rhs.z);
500 return not (lhs == rhs);
513 constexpr
size_t composite_dimensions_t::volume()
const {
return flatten().volume(); }
514 constexpr
size_t composite_dimensions_t::dimensionality()
const {
return flatten().dimensionality(); }
526 #if CUDA_VERSION >= 10020 529 read_disabled =
false,
530 write_enabled =
true,
531 write_disabled =
false 534 struct access_permissions_t {
538 operator CUmemAccess_flags()
const noexcept
541 (write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
542 CU_MEM_ACCESS_FLAGS_PROT_NONE;
545 static access_permissions_t from_access_flags(CUmemAccess_flags access_flags)
547 access_permissions_t result;
548 result.read = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READ);
549 result.write = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READWRITE);
553 static constexpr access_permissions_t read_and_write()
555 return access_permissions_t{ read_enabled, write_enabled };
559 namespace physical_allocation {
562 enum class shared_handle_kind_t : ::std::underlying_type<CUmemAllocationHandleType>::type {
563 #if CUDA_VERSION >= 11020 564 no_export = CU_MEM_HANDLE_TYPE_NONE,
566 posix_file_descriptor = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
567 win32_handle = CU_MEM_HANDLE_TYPE_WIN32,
568 win32_kmt = CU_MEM_HANDLE_TYPE_WIN32_KMT,
573 template<shared_handle_kind_t SharedHandleKind>
struct shared_handle_type_helper;
575 template <>
struct shared_handle_type_helper<shared_handle_kind_t::posix_file_descriptor> {
using type = int; };
576 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 577 template <>
struct shared_handle_type_helper<shared_handle_kind_t::win32_handle> {
using type =
void *; };
583 template<shared_handle_kind_t SharedHandleKind>
584 using shared_handle_t =
typename detail_::shared_handle_type_helper<SharedHandleKind>::type;
587 #endif // CUDA_VERSION >= 10020 588 #if CUDA_VERSION >= 11020 594 using handle_t = CUmemoryPool;
595 using shared_handle_kind_t = physical_allocation::shared_handle_kind_t;
596 using physical_allocation::shared_handle_t;
599 #endif // CUDA_VERSION >= 11020 603 using attribute_t = CUpointer_attribute;
614 static_assert(
sizeof(
void *) ==
sizeof(
device::address_t),
"Unexpected address size");
623 static_assert(
sizeof(
void*) ==
sizeof(
address_t),
"Incompatible sizes for a void pointer and memory::device::address_t");
624 return reinterpret_cast<address_t>(device_ptr);
631 static_assert(
sizeof(
void*) ==
sizeof(
device::address_t),
"Incompatible sizes for a void pointer and memory::device::address_t");
632 return reinterpret_cast<void*
>(
address);
649 using size_t = unsigned;
655 enum class initial_visibility_t {
657 to_supporters_of_concurrent_managed_access,
660 using range_attribute_t = CUmem_range_attribute;
664 #if CUDA_VERSION >= 11070 665 enum class barrier_scope_t : typename ::std::underlying_type<CUstreamMemoryBarrier_flags>::type {
666 device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
667 system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
669 #endif // CUDA_VERSION >= 11700 671 #if CUDA_VERSION >= 10000 677 using handle_t = CUexternalMemory;
682 struct subregion_spec_t {
689 #endif // CUDA_VERSION >= 10000 733 : ::std::underlying_type<CUsharedconfig>::type
735 device_default = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE,
736 four_bytes_per_bank = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE,
737 eight_bytes_per_bank = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
764 namespace peer_to_peer {
779 using handle_t = CUcontext;
781 using flags_t = unsigned;
845 using flags_t = context::flags_t;
847 namespace primary_context {
849 using handle_t = cuda::context::handle_t;
857 using native_word_t = unsigned;
861 template <
typename T,
typename U>
862 inline T identity_cast(U&& x)
864 static_assert(::std::is_same<
865 typename ::std::remove_reference<T>::type,
866 typename ::std::remove_reference<U>::type
868 "Casting to a different type - don't use identity_cast");
869 return static_cast<T
>(::std::forward<U>(x));
874 using uuid_t = CUuuid;
878 using handle_t = CUmodule;
884 using attribute_t = CUfunction_attribute;
885 using attribute_value_t = int;
889 using handle_t = CUfunction;
895 template <
typename T>
896 using dynarray = ::std::vector<T>;
907 #endif // CUDA_API_WRAPPERS_COMMON_TYPES_HPP_ int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:762
Keep control and spin-check for result availability.
Definition: types.hpp:816
Alias for the default behavior; see heuristic .
Definition: types.hpp:805
decltype(dim3::x) dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:319
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:288
Divide the cache resources to maximize available L1 cache at the expense of shared memory...
An implementation or an importation of a cuda::optional class and related definitions.
Definition: launch_configuration.hpp:58
Definition: kernel_launch.hpp:238
dimension_t block_dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:332
Default behavior; yield or spin based on a heuristic.
Definition: types.hpp:800
Yield control while waiting for results.
Definition: types.hpp:835
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:752
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:771
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:294
Definition: kernel_launch.hpp:77
A richer (kind-of-a-)wrapper for CUDA's dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:347
CUevent handle_t
The CUDA Runtime API's numeric handle for events.
Definition: types.hpp:257
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)'s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:732
CUdevice_attribute attribute_t
CUDA devices have both "attributes" and "properties".
Definition: types.hpp:758
host_thread_sync_scheduling_policy_t
Scheduling policies the Runtime API may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:788
Block the thread until results are available.
Definition: types.hpp:824
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: region.hpp:24
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:144
Contains an implementation of an std::span-like class, cuda::span
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:706
Composite dimensions for a grid - in terms of blocks, then also down into the block dimensions comple...
Definition: types.hpp:419
size_t overall_dimension_t
Dimension of a grid in threads along one axis, i.e.
Definition: types.hpp:452
Dimensions of a grid in threads, i.e.
Definition: types.hpp:458
address_t address(const void *device_ptr) noexcept
Return a pointers address as a numeric value of the type appropriate for device.
Definition: types.hpp:621
CUstream handle_t
The CUDA API's handle for streams.
Definition: types.hpp:281
Representation, allocation and manipulation of CUDA-related memory, of different kinds.
see heuristic
Definition: types.hpp:838
Divide the cache resources equally between actual L1 cache and shared memory.
Divide the cache resources to maximize available shared memory at the expense of L1 cache...
No preference for more L1 cache or for more shared memory; the API can do as it please.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:136
CUDA's array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:159
void zero(void *start, size_t num_bytes)
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:363