cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
types.hpp
Go to the documentation of this file.
1 
18 #pragma once
19 #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
20 #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
21 
22 #ifdef _MSC_VER
23 // See @url https://stackoverflow.com/q/4913922/1593077
24 #define NOMINMAX
25 #endif
26 
27 #if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L))
28 #error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard"
29 #endif
30 
31 #include "detail/optional.hpp"
32 #include "detail/optional_ref.hpp"
33 #include "detail/span.hpp"
34 #include "detail/region.hpp"
35 #include "detail/type_traits.hpp"
36 
37 #ifndef __CUDACC__
38 #include <builtin_types.h>
39 #endif
40 #include <cuda.h>
41 
42 #include <type_traits>
43 #include <utility>
44 #include <cassert>
45 #include <cstddef> // for ::std::size_t
46 #include <cstdint>
47 #include <vector>
48 #ifndef NDEBUG
49 #include <stdexcept>
50 #endif
51 
52 #ifndef __CUDACC__
53 #ifndef __device__
54 #define __device__
55 #define __host__
56 #endif
57 #endif
58 
59 
61 namespace cuda {
62 
63 // This alias for plain C arrays is required due to an MSVC bug, making it fail to
64 // accept straight up C array reference parameters to functions under some circumstances;
65 // see: https://developercommunity.visualstudio.com/t/MSVC-rejects-syntax-of-reference-to-C-ar/10792039
66 template <typename T, size_t N>
67 using c_array = T[N];
68 
77 using status_t = CUresult;
78 
81 using size_t = ::std::size_t;
82 
86 
91 namespace array {
92 
95 
104 template<dimensionality_t NumDimensions>
106 
108 template<>
109 struct dimensions_t<3> // this almost-inherits cudaExtent
110 {
112  dimension_t width, height, depth;
113 
114  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_, dimension_t depth_)
115  : width(width_), height(height_), depth(depth_) { }
116  constexpr __host__ __device__ dimensions_t(cudaExtent e)
117  : dimensions_t(e.width, e.height, e.depth) { }
118  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
119  : dimensions_t(other.width, other.height, other.depth) { }
120  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
121  : dimensions_t(other.width, other.height, other.depth) { }
122  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
123  : dimensions_t(linear_size, 1, 1) { }
124 
125  CPP14_CONSTEXPR dimensions_t& operator=(const dimensions_t& other) = default;
126  CPP14_CONSTEXPR dimensions_t& operator=(dimensions_t&& other) = default;
127 
128  constexpr __host__ __device__ operator cudaExtent() const
129  {
130  return { width, height, depth };
131  // Note: We're not using make_cudaExtent here because:
132  // 1. It's not constexpr and
133  // 2. It doesn't do anything except construct the plain struct - as of CUDA 10 at least
134  }
135 
137  constexpr __host__ __device__ size_t volume() const { return width * height * depth; }
138 
140  constexpr __host__ __device__ size_t size() const { return volume(); }
141 
144  constexpr __host__ __device__ dimensionality_t dimensionality() const
145  {
146  return ((width > 1) + (height> 1) + (depth > 1));
147  }
148 
150  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) { return dimensions_t{ x, x, x }; }
151 
153  // dimensions
154  static constexpr __host__ __device__ dimensions_t zero() { return cube(0); }
155 };
156 
158 template<>
159 struct dimensions_t<2>
160 {
163 
164  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_)
165  : width(width_), height(height_) { }
166  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
167  : dimensions_t(other.width, other.height) { }
168  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
169  : dimensions_t(other.width, other.height) { }
170  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
171  : dimensions_t(linear_size, 1) { }
172 
173  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(const dimensions_t& other)
174  {
175  width = other.width; height = other.height;
176  return *this;
177 
178  }
179  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(dimensions_t&& other)
180  {
181  width = other.width; height = other.height;
182  return *this;
183  }
184 
186  constexpr __host__ __device__ size_t area() const { return width * height; }
187 
189  constexpr __host__ __device__ size_t size() const { return area(); }
190 
193  constexpr __host__ __device__ dimensionality_t dimensionality() const
194  {
195  return ((width > 1) + (height> 1));
196  }
197 
198  // Named constructor idioms
199 
201  static constexpr __host__ __device__ dimensions_t square(dimension_t x) { return dimensions_t{ x, x }; }
202 
204  // dimensions
205  static constexpr __host__ __device__ dimensions_t zero() { return square(0); }
206 };
207 
208 } // namespace array
209 
214 namespace event {
215 
217 using handle_t = CUevent;
218 
223 namespace ipc {
224 
226 using handle_t = CUipcEventHandle;
227 
228 } // namespace ipc
229 
230 } // namespace event
231 
236 namespace stream {
237 
239 using handle_t = CUstream;
240 
246 using priority_t = int;
247 enum : priority_t {
250 };
251 
252 
254 #if CUDA_VERSION >= 10000
255 using callback_t = CUhostFn;
256 #else
257 using callback_t = CUstreamCallback;
258 #endif
259 
260 #if CUDA_VERSION >= 10000
261 
262 namespace capture {
263 
264 enum class mode_t : ::std::underlying_type<CUstreamCaptureMode>::type {
265  global = CU_STREAM_CAPTURE_MODE_GLOBAL,
266  thread = CU_STREAM_CAPTURE_MODE_THREAD_LOCAL,
267  thread_local_ = thread,
268  relaxed = CU_STREAM_CAPTURE_MODE_RELAXED
269 };
270 
271 enum class state_t : ::std::underlying_type<CUstreamCaptureStatus>::type {
272  active = CU_STREAM_CAPTURE_STATUS_ACTIVE,
273  capturing = active,
274  invalidated = CU_STREAM_CAPTURE_STATUS_INVALIDATED,
275  none = CU_STREAM_CAPTURE_STATUS_NONE,
276  not_capturing = none
277 };
278 
279 } // namespace capture
280 
281 inline bool is_capturing(capture::state_t status) noexcept
282 {
283  return status == capture::state_t::active;
284 }
285 
286 #endif // CUDA_VERSION >= 10000
287 
288 } // namespace stream
289 
290 namespace grid {
291 
299 using dimension_t = decltype(dim3::x);
300 
313 
314 
325 struct dimensions_t // this almost-inherits dim3
326 {
327  dimension_t x, y, z;
328  constexpr __host__ __device__ dimensions_t(dimension_t x_ = 1, dimension_t y_ = 1, dimension_t z_ = 1) noexcept
329  : x(x_), y(y_), z(z_) { }
330 
331  constexpr __host__ __device__ dimensions_t(const uint3& v) noexcept : dimensions_t(v.x, v.y, v.z) { }
332  constexpr __host__ __device__ dimensions_t(const dim3& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }
333  constexpr __host__ __device__ dimensions_t(dim3&& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }
334 
335  constexpr __host__ __device__ operator uint3(void) const { return { x, y, z }; }
336 
337  // This _should_ have been constexpr, but nVIDIA have not marked the dim3 constructors
338  // as constexpr, so it isn't
339  __host__ __device__ operator dim3(void) const noexcept { return { x, y, z }; }
340 
342  constexpr __host__ __device__ size_t volume() const noexcept { return static_cast<size_t>(x) * y * z; }
343 
346  constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
347  {
348  return ((z > 1) + (y > 1) + (x > 1));
349  }
350 
351  // Named constructor idioms
352 
354  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) noexcept { return dimensions_t{ x, x, x }; }
355 
357  static constexpr __host__ __device__ dimensions_t square(dimension_t x) noexcept { return dimensions_t{ x, x, 1 }; }
358 
360  static constexpr __host__ __device__ dimensions_t line(dimension_t x) noexcept { return dimensions_t{ x, 1, 1 }; }
361 
363  static constexpr __host__ __device__ dimensions_t point() noexcept { return dimensions_t{ 1, 1, 1 }; }
364 
367  static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
368  {
369  return
370  (rhs.x % lhs.x == 0) and
371  (rhs.y % lhs.y == 0) and
372  (rhs.z % lhs.z == 0);
373  }
374 
379  constexpr dimension_t operator[](int i) const noexcept {
380  return (i == 0) ? x :
381  (i == 1) ? y :
382  z;
383  }
384  CPP14_CONSTEXPR dimension_t& operator[](int i) noexcept {
385  return (i == 0) ? x :
386  (i == 1) ? y :
387  z;
388  }
390 };
391 
393 constexpr inline bool operator==(const dim3& lhs, const dim3& rhs) noexcept
394 {
395  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
396 }
397 constexpr inline bool operator!=(const dim3& lhs, const dim3& rhs) noexcept
398 {
399  return not (lhs == rhs);
400 }
401 constexpr inline bool operator==(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
402 {
403  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
404 }
405 constexpr inline bool operator!=(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
406 {
407  return not (lhs == rhs);
408 }
410 
411 
420 
427 
433 {
434  using dimension_type = overall_dimension_t;
435  dimension_type x, y, z;
436 
437  constexpr __host__ __device__ overall_dimensions_t(
438  dimension_type width_, dimension_type height_, dimension_type depth_) noexcept
439  : x(width_), y(height_), z(depth_) { }
440 
441  constexpr __host__ __device__ overall_dimensions_t(const dim3& dims) noexcept
442  : x(dims.x), y(dims.y), z(dims.z) { }
443 
444  constexpr __host__ __device__ overall_dimensions_t(dim3&& dims) noexcept
445  : x(dims.x), y(dims.y), z(dims.z) { }
446 
447  constexpr __host__ __device__ overall_dimensions_t(const overall_dimensions_t& other) noexcept
448  : overall_dimensions_t(other.x, other.y, other.z) { }
449 
450  constexpr __host__ __device__ overall_dimensions_t(overall_dimensions_t&& other) noexcept
451  : overall_dimensions_t(other.x, other.y, other.z) { }
452 
453  explicit constexpr __host__ __device__ overall_dimensions_t(dimensions_t dims) noexcept
454  : overall_dimensions_t(dims.x, dims.y, dims.z) { }
455 
456  CPP14_CONSTEXPR overall_dimensions_t& operator=(const overall_dimensions_t& other) noexcept = default;
457  CPP14_CONSTEXPR overall_dimensions_t& operator=(overall_dimensions_t&& other) noexcept = default;
458 
459  constexpr __host__ __device__ size_t volume() const noexcept { return x * y * z; }
460  constexpr __host__ __device__ size_t size() const noexcept { return volume(); }
461  constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
462  {
463  return ((x > 1) + (y > 1) + (z > 1));
464  }
465 
470  constexpr dimension_type operator[](int i) const noexcept {
471  return (i == 0) ? x :
472  (i == 1) ? y :
473  z;
474  }
475  CPP14_CONSTEXPR dimension_type& operator[](int i) noexcept {
476  return (i == 0) ? x :
477  (i == 1) ? y :
478  z;
479  }
481 };
482 
484 constexpr bool operator==(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
485 {
486  return (lhs.x == rhs.x) and (lhs.y == rhs.y) and (lhs.z == rhs.z);
487 }
488 
489 constexpr bool operator!=(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
490 {
491  return not (lhs == rhs);
492 }
493 
494 constexpr overall_dimensions_t operator*(dimensions_t grid_dims, block_dimensions_t block_dims) noexcept
495 {
496  return overall_dimensions_t {
497  grid_dims.x * overall_dimension_t { block_dims.x },
498  grid_dims.y * overall_dimension_t { block_dims.y },
499  grid_dims.z * overall_dimension_t { block_dims.z },
500  };
501 }
503 
509  grid::dimensions_t grid;
511 
513  constexpr overall_dimensions_t flatten() const noexcept { return grid * block; }
514 
516  constexpr size_t volume() const noexcept { return flatten().volume(); }
517 
519  constexpr size_t dimensionality() const noexcept { return flatten().dimensionality(); }
520 
523  static constexpr composite_dimensions_t point() noexcept
524  {
526  }
527 
529 #if __cplusplus >= 202002L
530  constexpr bool operator==(const composite_dimensions_t&) const noexcept = default;
531  constexpr bool operator!=(const composite_dimensions_t&) const noexcept = default;
532 #endif
533 };
535 
536 #if __cplusplus < 202002L
537 constexpr bool operator==(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
539 {
540  return (lhs.grid == rhs.grid) and (lhs.block == rhs.block);
541 }
542 
543 constexpr bool operator!=(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
544 {
545  return not (lhs == rhs);
546 }
548 #endif // __cplusplus < 202002L
549 
550 } // namespace grid
551 
553 namespace memory {
554 
555 #if CUDA_VERSION >= 10020
556 
564 struct permissions_t {
565  bool read;
566  bool write;
567 
570  operator CUmemAccess_flags() const noexcept
571  {
572  return read ?
573  (write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
574  CU_MEM_ACCESS_FLAGS_PROT_NONE;
575  }
576 
577 };
578 
579 namespace permissions {
580 
581 constexpr inline permissions_t none() { return permissions_t{ false, false }; }
582 constexpr inline permissions_t read_only() { return permissions_t{ true, false }; }
583 constexpr inline permissions_t write_only() { return permissions_t{ false, true }; }
584 constexpr inline permissions_t read_and_write() { return permissions_t{ true, true }; }
585 
586 namespace detail_ {
587 
589 inline permissions_t from_flags(CUmemAccess_flags access_flags)
590 {
591  bool read = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READ);
592  bool write = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READWRITE);
593  return permissions_t{read, write};
594 }
595 
596 } // namespace detail_
597 
598 } // namespace permissions
599 
600 
601 namespace physical_allocation {
602 
605 enum class shared_handle_kind_t : ::std::underlying_type<CUmemAllocationHandleType>::type {
606 #if CUDA_VERSION >= 11020
607  no_export = CU_MEM_HANDLE_TYPE_NONE,
608 #endif
609  posix_file_descriptor = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
610  win32_handle = CU_MEM_HANDLE_TYPE_WIN32,
611  win32_kmt = CU_MEM_HANDLE_TYPE_WIN32_KMT,
612 };
613 
614 namespace detail_ {
615 
616 template<shared_handle_kind_t SharedHandleKind> struct shared_handle_type_helper;
617 
618 template <> struct shared_handle_type_helper<shared_handle_kind_t::posix_file_descriptor> { using type = int; };
619 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
620 template <> struct shared_handle_type_helper<shared_handle_kind_t::win32_handle> { using type = void *; };
621 #endif
622 // TODO: What about WIN32_KMT?
623 
624 } // namespace detail_
625 
627 template<shared_handle_kind_t SharedHandleKind>
628 using shared_handle_t = typename detail_::shared_handle_type_helper<SharedHandleKind>::type;
629 
630 } // namespace physical_allocation
631 #endif // CUDA_VERSION >= 10020
632 #if CUDA_VERSION >= 11020
633 
635 namespace pool {
636 
638 using handle_t = CUmemoryPool;
639 
642 using shared_handle_kind_t = physical_allocation::shared_handle_kind_t;
643 
646 using physical_allocation::shared_handle_t;
647 
648 namespace ipc {
649 
652 using ptr_handle_t = CUmemPoolPtrExportData;
653 
654 } // namespace ipc
655 
656 } // namespace pool
657 #endif // CUDA_VERSION >= 11020
658 
659 namespace pointer {
660 
662 using attribute_t = CUpointer_attribute;
663 
664 } // namespace pointer
665 
666 namespace device {
667 
671 
672 using address_t = CUdeviceptr;
673 
674 static_assert(sizeof(void *) == sizeof(device::address_t), "Unexpected address size");
675 
682 inline address_t address(const void* device_ptr) noexcept
683 {
684  static_assert(sizeof(void*) == sizeof(address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
685  return reinterpret_cast<address_t>(device_ptr);
686 }
687 
693 inline address_t address(memory::const_region_t region) noexcept { return address(region.start()); }
694 
695 } // namespace device
696 
700 inline void* as_pointer(device::address_t address) noexcept
701 {
702  static_assert(sizeof(void*) == sizeof(device::address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
703  return reinterpret_cast<void*>(address);
704 }
705 
716 namespace shared {
717 
730 using size_t = unsigned;
731 
732 } // namespace shared
733 
750 namespace managed {
751 
754  to_all_devices,
755  to_supporters_of_concurrent_managed_access,
756 };
757 
758 } // namespace managed
759 
760 #if CUDA_VERSION >= 11070
761 enum class barrier_scope_t : typename ::std::underlying_type<CUstreamMemoryBarrier_flags>::type {
765  device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
766  system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
767 };
768 #endif // CUDA_VERSION >= 11700
769 
770 #if CUDA_VERSION >= 10000
771 namespace external {
773 
775 using handle_t = CUexternalMemory;
776 
780 struct subregion_spec_t {
781  size_t offset;
782  size_t size;
783 };
784 
785 } // namespace external
786 
787 #endif // CUDA_VERSION >= 10000
788 
789 } // namespace memory
790 
796 
804 enum class multiprocessor_cache_preference_t : ::std::underlying_type<CUfunc_cache_enum>::type {
806  no_preference = CU_FUNC_CACHE_PREFER_NONE,
808  equal_l1_and_shared_memory = CU_FUNC_CACHE_PREFER_EQUAL,
810  prefer_shared_memory_over_l1 = CU_FUNC_CACHE_PREFER_SHARED,
812  prefer_l1_over_shared_memory = CU_FUNC_CACHE_PREFER_L1,
813  // aliases
814  none = no_preference,
815  equal = equal_l1_and_shared_memory,
816  prefer_shared = prefer_shared_memory_over_l1,
817  prefer_l1 = prefer_l1_over_shared_memory,
818 };
819 
831  : ::std::underlying_type<CUsharedconfig>::type
832 {
833  device_default = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE,
834  four_bytes_per_bank = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE,
835  eight_bytes_per_bank = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
836 };
837 
842 namespace device {
843 
850 using id_t = CUdevice;
851 
856 using attribute_t = CUdevice_attribute;
860 using attribute_value_t = int;
861 
862 namespace peer_to_peer {
863 
869 using attribute_t = CUdevice_P2PAttribute;
870 
871 } // namespace peer_to_peer
872 
873 } // namespace device
874 
875 namespace context {
876 
878 using handle_t = CUcontext;
879 
880 using flags_t = unsigned;
881 
885 
896  heuristic = CU_CTX_SCHED_AUTO,
897 
902 
912  spin = CU_CTX_SCHED_SPIN,
913 
920  block = CU_CTX_SCHED_BLOCKING_SYNC,
921 
931  yield = CU_CTX_SCHED_YIELD,
932 
935 };
936 
937 } // namespace context
938 
939 namespace device {
940 
941 using flags_t = context::flags_t;
942 
943 namespace primary_context {
944 
947 
948 } // namespace primary_context
949 
952 
953 } // namespace device
954 
955 namespace detail_ {
956 
957 template <typename T, typename U>
958 inline T identity_cast(U&& x)
959 {
960  static_assert(::std::is_same<
961  typename ::std::remove_reference<T>::type,
962  typename ::std::remove_reference<U>::type
963  >::value,
964  "Casting to a different type - don't use identity_cast");
965  return static_cast<T>(::std::forward<U>(x));
966 }
967 
968 } // namespace detail_
969 
971 using uuid_t = CUuuid;
972 
973 namespace module {
974 
976 using handle_t = CUmodule;
977 
978 } // namespace module
979 
980 namespace kernel {
981 
983 using attribute_t = CUfunction_attribute;
984 
988 using attribute_value_t = int;
989 
990 // A raw CUDA driver handle for a kernel; prefer using the @ref cuda::kernel_t type.
991 using handle_t = CUfunction;
992 
993 } // namespace kernel
994 
995 #if CUDA_VERSION >= 10000
996 
1002 namespace graph {
1003 
1004 namespace node {
1005 
1007 using handle_t = CUgraphNode;
1009 using const_handle_t = CUgraphNode_st const *;
1010 
1011 constexpr const const_handle_t no_handle = nullptr;
1012 
1013 } // namespace node
1014 
1020 namespace template_ {
1021 
1023 using handle_t = CUgraph;
1024 constexpr const handle_t null_handle = nullptr;
1025 
1026 } // namespace template_
1027 
1033 namespace instance {
1034 
1036 using handle_t = CUgraphExec;
1037 
1038 } // namespace instance
1039 
1040 } // namespace graph
1041 
1042 #endif // CUDA_VERSION >= 10000
1043 
1044 } // namespace cuda
1045 
1046 #endif // CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:860
constexpr size_t dimensionality() const noexcept
Definition: types.hpp:519
Keep control and spin-check for result availability.
Definition: types.hpp:912
int attribute_value_t
The uniform type the CUDA driver uses for all kernel attributes; it is typically more appropriate to ...
Definition: types.hpp:988
Alias for the default behavior; see heuristic .
Definition: types.hpp:901
cuda::context::handle_t handle_t
Raw CUDA driver handle for a device&#39;s primary context.
Definition: types.hpp:946
decltype(dim3::x) dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:299
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246
Divide the cache resources to maximize available L1 cache at the expense of shared memory...
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries&#39;s API calls: // The NVRTC librar...
Definition: types.hpp:124
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
dimension_t width
The three constituent individual dimensions, named.
Definition: types.hpp:112
dimension_t block_dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:312
Default behavior; yield or spin based on a heuristic.
Definition: types.hpp:896
Yield control while waiting for results.
Definition: types.hpp:931
constexpr dimension_type operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:470
CUuuid uuid_t
The CUDA-driver-specific representation of a UUID value; see also {device_t::uuid()}.
Definition: types.hpp:971
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
Definition: types.hpp:367
constexpr dimension_t operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:379
context::host_thread_sync_scheduling_policy_t host_thread_sync_scheduling_policy_t
Definition: types.hpp:951
A richer (kind-of-a-)wrapper for CUDA&#39;s dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:325
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)&#39;s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:830
CUdevice_attribute attribute_t
CUDA devices have both "attributes" and "properties".
Definition: types.hpp:856
void point(const CharT *description, color_t color=color_t::Black())
Mark a single point on the profiler timeline, giving it also a color and some descriptive text...
Definition: profiling.hpp:185
unsigned size_t
Each physical core ("Symmetric Multiprocessor") on an nVIDIA GPU has a space of shared memory (see th...
Definition: types.hpp:730
dimension_t width
The two constituent individual dimensions, named; no "depth" for the 2D case.
Definition: types.hpp:162
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884
Block the thread until results are available.
Definition: types.hpp:920
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:85
CUpointer_attribute attribute_t
Raw CUDA driver choice type for attributes of pointers.
Definition: types.hpp:662
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
address_t address(memory::const_region_t region) noexcept
Definition: types.hpp:693
CUstreamCallback callback_t
The CUDA driver&#39;s raw handle for a host-side callback function.
Definition: types.hpp:257
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:804
Composite dimensions for a grid - in terms of blocks, then also down into the block dimensions comple...
Definition: types.hpp:508
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:869
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
size_t overall_dimension_t
Dimension of a grid in threads along one axis, i.e.
Definition: types.hpp:426
Dimensions of a grid in threads, i.e.
Definition: types.hpp:432
detail_::region_helper< memory::const_region_t > const_region_t
A child class of the generic const_region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1962
size_t dimension_t
An individual dimension extent for an array.
Definition: types.hpp:94
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:249
constexpr overall_dimensions_t flatten() const noexcept
Definition: types.hpp:513
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: types.hpp:672
constexpr size_t volume() const noexcept
Definition: types.hpp:516
see heuristic
Definition: types.hpp:934
Divide the cache resources equally between actual L1 cache and shared memory.
static constexpr composite_dimensions_t point() noexcept
A named constructor idiom for the composite dimensions of a single-block grid with a single-thread bl...
Definition: types.hpp:523
Divide the cache resources to maximize available shared memory at the expense of L1 cache...
No preference for more L1 cache or for more shared memory; the API can do as it please.
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77
CUDA&#39;s array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:105
CUfunction_attribute attribute_t
Raw CUDA driver selector of a kernel attribute.
Definition: types.hpp:983
void zero(void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:416