cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
types.hpp
Go to the documentation of this file.
1 
18 #pragma once
19 #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
20 #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
21 
22 #ifdef _MSC_VER
23 // See @url https://stackoverflow.com/q/4913922/1593077
24 #define NOMINMAX
25 #endif
26 
27 #include "detail/preamble.hpp"
28 #include "detail/optional.hpp"
29 #include "detail/optional_ref.hpp"
30 #include "detail/span.hpp"
31 #include "detail/region.hpp"
32 #include "detail/type_traits.hpp"
33 
34 #ifndef __CUDACC__
35 #include <builtin_types.h>
36 #endif
37 #include <cuda.h>
38 
39 #include <type_traits>
40 #include <utility>
41 #include <cassert>
42 #include <cstddef> // for ::std::size_t
43 #include <cstdint>
44 #include <vector>
45 #ifndef NDEBUG
46 #include <stdexcept>
47 #endif
48 
49 #ifndef __CUDACC__
50 #ifndef __device__
51 #define __device__
52 #define __host__
53 #endif
54 #endif
55 
56 
58 namespace cuda {
59 
60 // This alias for plain C arrays is required due to an MSVC bug, making it fail to
61 // accept straight up C array reference parameters to functions under some circumstances;
62 // see: https://developercommunity.visualstudio.com/t/MSVC-rejects-syntax-of-reference-to-C-ar/10792039
63 template <typename T, size_t N>
64 using c_array = T[N];
65 
74 using status_t = CUresult;
75 
78 using size_t = ::std::size_t;
79 
83 
88 namespace array {
89 
92 
101 template<dimensionality_t NumDimensions>
103 
105 template<>
106 struct dimensions_t<3> // this almost-inherits cudaExtent
107 {
109  dimension_t width, height, depth;
110 
111  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_, dimension_t depth_)
112  : width(width_), height(height_), depth(depth_) { }
113  constexpr __host__ __device__ dimensions_t(cudaExtent e)
114  : dimensions_t(e.width, e.height, e.depth) { }
115  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
116  : dimensions_t(other.width, other.height, other.depth) { }
117  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
118  : dimensions_t(other.width, other.height, other.depth) { }
119  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
120  : dimensions_t(linear_size, 1, 1) { }
121 
122  CPP14_CONSTEXPR dimensions_t& operator=(const dimensions_t& other) = default;
123  CPP14_CONSTEXPR dimensions_t& operator=(dimensions_t&& other) = default;
124 
125  constexpr __host__ __device__ operator cudaExtent() const
126  {
127  return { width, height, depth };
128  // Note: We're not using make_cudaExtent here because:
129  // 1. It's not constexpr and
130  // 2. It doesn't do anything except construct the plain struct - as of CUDA 10 at least
131  }
132 
134  constexpr __host__ __device__ size_t volume() const { return width * height * depth; }
135 
137  constexpr __host__ __device__ size_t size() const { return volume(); }
138 
141  constexpr __host__ __device__ dimensionality_t dimensionality() const
142  {
143  return (width > 1) + (height> 1) + (depth > 1);
144  }
145 
147  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) { return dimensions_t{ x, x, x }; }
148 
150  // dimensions
151  static constexpr __host__ __device__ dimensions_t zero() { return cube(0); }
152 };
153 
155 template<>
156 struct dimensions_t<2>
157 {
160 
161  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_)
162  : width(width_), height(height_) { }
163  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
164  : dimensions_t(other.width, other.height) { }
165  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
166  : dimensions_t(other.width, other.height) { }
167  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
168  : dimensions_t(linear_size, 1) { }
169 
170  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(const dimensions_t& other)
171  {
172  width = other.width; height = other.height;
173  return *this;
174 
175  }
176  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(dimensions_t&& other)
177  {
178  width = other.width; height = other.height;
179  return *this;
180  }
181 
183  constexpr __host__ __device__ size_t area() const { return width * height; }
184 
186  constexpr __host__ __device__ size_t size() const { return area(); }
187 
190  constexpr __host__ __device__ dimensionality_t dimensionality() const
191  {
192  return (width > 1) + (height> 1);
193  }
194 
195  // Named constructor idioms
196 
198  static constexpr __host__ __device__ dimensions_t square(dimension_t x) { return dimensions_t{ x, x }; }
199 
201  // dimensions
202  static constexpr __host__ __device__ dimensions_t zero() { return square(0); }
203 };
204 
205 } // namespace array
206 
211 namespace event {
212 
214 using handle_t = CUevent;
215 
220 namespace ipc {
221 
223 using handle_t = CUipcEventHandle;
224 
225 } // namespace ipc
226 
227 } // namespace event
228 
233 namespace stream {
234 
236 using handle_t = CUstream;
237 
243 using priority_t = int;
244 enum : priority_t {
247 };
248 
249 
251 #if CUDA_VERSION >= 10000
252 using callback_t = CUhostFn;
253 #else
254 using callback_t = CUstreamCallback;
255 #endif
256 
257 #if CUDA_VERSION >= 10000
258 
259 namespace capture {
260 
261 enum class mode_t : ::std::underlying_type<CUstreamCaptureMode>::type {
262  global = CU_STREAM_CAPTURE_MODE_GLOBAL,
263  thread = CU_STREAM_CAPTURE_MODE_THREAD_LOCAL,
264  thread_local_ = thread,
265  relaxed = CU_STREAM_CAPTURE_MODE_RELAXED
266 };
267 
268 enum class state_t : ::std::underlying_type<CUstreamCaptureStatus>::type {
269  active = CU_STREAM_CAPTURE_STATUS_ACTIVE,
270  capturing = active,
271  invalidated = CU_STREAM_CAPTURE_STATUS_INVALIDATED,
272  none = CU_STREAM_CAPTURE_STATUS_NONE,
273  not_capturing = none
274 };
275 
276 } // namespace capture
277 
278 inline bool is_capturing(capture::state_t status) noexcept
279 {
280  return status == capture::state_t::active;
281 }
282 
283 #endif // CUDA_VERSION >= 10000
284 
285 } // namespace stream
286 
287 namespace grid {
288 
296 using dimension_t = decltype(dim3::x);
297 
310 
311 
322 struct dimensions_t // this almost-inherits dim3
323 {
324  dimension_t x, y, z;
325  constexpr __host__ __device__ dimensions_t(dimension_t x_ = 1, dimension_t y_ = 1, dimension_t z_ = 1) noexcept
326  : x(x_), y(y_), z(z_) { }
327 
328  constexpr __host__ __device__ dimensions_t(const uint3& v) noexcept : dimensions_t(v.x, v.y, v.z) { }
329  constexpr __host__ __device__ dimensions_t(const dim3& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }
330  constexpr __host__ __device__ dimensions_t(dim3&& dims) noexcept : dimensions_t(dims.x, dims.y, dims.z) { }
331 
332  constexpr __host__ __device__ operator uint3(void) const { return { x, y, z }; }
333 
334  // This _should_ have been constexpr, but nVIDIA have not marked the dim3 constructors
335  // as constexpr, so it isn't
336  __host__ __device__ operator dim3(void) const noexcept { return { x, y, z }; }
337 
339  constexpr __host__ __device__ size_t volume() const noexcept { return static_cast<size_t>(x) * y * z; }
340 
343  constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
344  {
345  return (z > 1) + (y > 1) + (x > 1);
346  }
347 
348  // Named constructor idioms
349 
351  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) noexcept { return dimensions_t{ x, x, x }; }
352 
354  static constexpr __host__ __device__ dimensions_t square(dimension_t x) noexcept { return dimensions_t{ x, x, 1 }; }
355 
357  static constexpr __host__ __device__ dimensions_t line(dimension_t x) noexcept { return dimensions_t{ x, 1, 1 }; }
358 
360  static constexpr __host__ __device__ dimensions_t point() noexcept { return dimensions_t{ 1, 1, 1 }; }
361 
364  static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
365  {
366  return
367  (rhs.x % lhs.x == 0) and
368  (rhs.y % lhs.y == 0) and
369  (rhs.z % lhs.z == 0);
370  }
371 
376  constexpr dimension_t operator[](int i) const noexcept {
377  return (i == 0) ? x :
378  (i == 1) ? y :
379  z;
380  }
381  CPP14_CONSTEXPR dimension_t& operator[](int i) noexcept {
382  return (i == 0) ? x :
383  (i == 1) ? y :
384  z;
385  }
387 };
388 
390 constexpr inline bool operator==(const dim3& lhs, const dim3& rhs) noexcept
391 {
392  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
393 }
394 constexpr inline bool operator!=(const dim3& lhs, const dim3& rhs) noexcept
395 {
396  return not (lhs == rhs);
397 }
398 constexpr inline bool operator==(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
399 {
400  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
401 }
402 constexpr inline bool operator!=(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
403 {
404  return not (lhs == rhs);
405 }
407 
408 
417 
424 
430 {
431  using dimension_type = overall_dimension_t;
432  dimension_type x, y, z;
433 
434  constexpr __host__ __device__ overall_dimensions_t(
435  dimension_type width_, dimension_type height_, dimension_type depth_) noexcept
436  : x(width_), y(height_), z(depth_) { }
437 
438  constexpr __host__ __device__ overall_dimensions_t(const dim3& dims) noexcept
439  : x(dims.x), y(dims.y), z(dims.z) { }
440 
441  constexpr __host__ __device__ overall_dimensions_t(dim3&& dims) noexcept
442  : x(dims.x), y(dims.y), z(dims.z) { }
443 
444  constexpr __host__ __device__ overall_dimensions_t(const overall_dimensions_t& other) noexcept
445  : overall_dimensions_t(other.x, other.y, other.z) { }
446 
447  constexpr __host__ __device__ overall_dimensions_t(overall_dimensions_t&& other) noexcept
448  : overall_dimensions_t(other.x, other.y, other.z) { }
449 
450  explicit constexpr __host__ __device__ overall_dimensions_t(dimensions_t dims) noexcept
451  : overall_dimensions_t(dims.x, dims.y, dims.z) { }
452 
453  CPP14_CONSTEXPR overall_dimensions_t& operator=(const overall_dimensions_t& other) noexcept = default;
454  CPP14_CONSTEXPR overall_dimensions_t& operator=(overall_dimensions_t&& other) noexcept = default;
455 
456  constexpr __host__ __device__ size_t volume() const noexcept { return x * y * z; }
457  constexpr __host__ __device__ size_t size() const noexcept { return volume(); }
458  constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
459  {
460  return ((x > 1) + (y > 1) + (z > 1));
461  }
462 
467  constexpr dimension_type operator[](int i) const noexcept {
468  return (i == 0) ? x :
469  (i == 1) ? y :
470  z;
471  }
472  CPP14_CONSTEXPR dimension_type& operator[](int i) noexcept {
473  return (i == 0) ? x :
474  (i == 1) ? y :
475  z;
476  }
478 };
479 
481 constexpr bool operator==(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
482 {
483  return (lhs.x == rhs.x) and (lhs.y == rhs.y) and (lhs.z == rhs.z);
484 }
485 
486 constexpr bool operator!=(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
487 {
488  return not (lhs == rhs);
489 }
490 
491 constexpr overall_dimensions_t operator*(dimensions_t grid_dims, block_dimensions_t block_dims) noexcept
492 {
493  return overall_dimensions_t {
494  grid_dims.x * overall_dimension_t { block_dims.x },
495  grid_dims.y * overall_dimension_t { block_dims.y },
496  grid_dims.z * overall_dimension_t { block_dims.z },
497  };
498 }
500 
506  grid::dimensions_t grid;
508 
510  constexpr overall_dimensions_t flatten() const noexcept { return grid * block; }
511 
513  constexpr size_t volume() const noexcept { return flatten().volume(); }
514 
516  constexpr size_t dimensionality() const noexcept { return flatten().dimensionality(); }
517 
520  static constexpr composite_dimensions_t point() noexcept
521  {
523  }
524 
526 #if __cplusplus >= 202002L
527  constexpr bool operator==(const composite_dimensions_t&) const noexcept = default;
528  constexpr bool operator!=(const composite_dimensions_t&) const noexcept = default;
529 #endif
530 };
532 
533 #if __cplusplus < 202002L
534 constexpr bool operator==(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
536 {
537  return (lhs.grid == rhs.grid) and (lhs.block == rhs.block);
538 }
539 
540 constexpr bool operator!=(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
541 {
542  return not (lhs == rhs);
543 }
545 #endif // __cplusplus < 202002L
546 
547 } // namespace grid
548 
550 namespace memory {
551 
555 using location_t = CUmemLocation;
556 
557 #if CUDA_VERSION >= 10020
558 
566 struct permissions_t {
567  bool read;
568  bool write;
569 
572  operator CUmemAccess_flags() const noexcept
573  {
574  return read ?
575  (write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
576  CU_MEM_ACCESS_FLAGS_PROT_NONE;
577  }
578 
579 };
580 
581 namespace permissions {
582 
583 constexpr inline permissions_t none() { return permissions_t{ false, false }; }
584 constexpr inline permissions_t read_only() { return permissions_t{ true, false }; }
585 constexpr inline permissions_t write_only() { return permissions_t{ false, true }; }
586 constexpr inline permissions_t read_and_write() { return permissions_t{ true, true }; }
587 
588 namespace detail_ {
589 
591 inline permissions_t from_flags(CUmemAccess_flags access_flags)
592 {
593  bool read = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READ);
594  bool write = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READWRITE);
595  return permissions_t{read, write};
596 }
597 
598 } // namespace detail_
599 
600 } // namespace permissions
601 
602 
603 namespace physical_allocation {
604 
607 enum class shared_handle_kind_t : ::std::underlying_type<CUmemAllocationHandleType>::type {
608 #if CUDA_VERSION >= 11020
609  no_export = CU_MEM_HANDLE_TYPE_NONE,
610 #endif
611  posix_file_descriptor = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
612  win32_handle = CU_MEM_HANDLE_TYPE_WIN32,
613  win32_kmt = CU_MEM_HANDLE_TYPE_WIN32_KMT,
614 };
615 
616 namespace detail_ {
617 
618 template<shared_handle_kind_t SharedHandleKind> struct shared_handle_type_helper;
619 
620 template <> struct shared_handle_type_helper<shared_handle_kind_t::posix_file_descriptor> { using type = int; };
621 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
622 template <> struct shared_handle_type_helper<shared_handle_kind_t::win32_handle> { using type = void *; };
623 #endif
624 // TODO: What about WIN32_KMT?
625 
626 } // namespace detail_
627 
629 template<shared_handle_kind_t SharedHandleKind>
630 using shared_handle_t = typename detail_::shared_handle_type_helper<SharedHandleKind>::type;
631 
632 } // namespace physical_allocation
633 #endif // CUDA_VERSION >= 10020
634 #if CUDA_VERSION >= 11020
635 
637 namespace pool {
638 
640 using handle_t = CUmemoryPool;
641 
644 using shared_handle_kind_t = physical_allocation::shared_handle_kind_t;
645 
648 using physical_allocation::shared_handle_t;
649 
650 namespace ipc {
651 
654 using ptr_handle_t = CUmemPoolPtrExportData;
655 
656 } // namespace ipc
657 
658 } // namespace pool
659 #endif // CUDA_VERSION >= 11020
660 
661 namespace pointer {
662 
664 using attribute_t = CUpointer_attribute;
665 
666 } // namespace pointer
667 
668 namespace device {
669 
673 
674 using address_t = CUdeviceptr;
675 
676 static_assert(sizeof(void *) == sizeof(device::address_t), "Unexpected address size");
677 
684 inline address_t address(const void* device_ptr) noexcept
685 {
686  static_assert(sizeof(void*) == sizeof(address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
687  return reinterpret_cast<address_t>(device_ptr);
688 }
689 
695 inline address_t address(memory::const_region_t region) noexcept { return address(region.start()); }
696 
697 } // namespace device
698 
702 inline void* as_pointer(device::address_t address) noexcept
703 {
704  static_assert(sizeof(void*) == sizeof(device::address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
705  return reinterpret_cast<void*>(address);
706 }
707 
718 namespace shared {
719 
732 using size_t = unsigned;
733 
734 } // namespace shared
735 
752 namespace managed {
753 
756  to_all_devices,
757  to_supporters_of_concurrent_managed_access,
758 };
759 
760 } // namespace managed
761 
762 #if CUDA_VERSION >= 11070
763 enum class barrier_scope_t : ::std::underlying_type<CUstreamMemoryBarrier_flags>::type {
767  device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
768  system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
769 };
770 #endif // CUDA_VERSION >= 11700
771 
772 #if CUDA_VERSION >= 10000
773 namespace external {
775 
777 using handle_t = CUexternalMemory;
778 
782 struct subregion_spec_t {
783  size_t offset;
784  size_t size;
785 };
786 
787 } // namespace external
788 
789 #endif // CUDA_VERSION >= 10000
790 
791 } // namespace memory
792 
798 
806 enum class multiprocessor_cache_preference_t : ::std::underlying_type<CUfunc_cache_enum>::type {
808  no_preference = CU_FUNC_CACHE_PREFER_NONE,
810  equal_l1_and_shared_memory = CU_FUNC_CACHE_PREFER_EQUAL,
812  prefer_shared_memory_over_l1 = CU_FUNC_CACHE_PREFER_SHARED,
814  prefer_l1_over_shared_memory = CU_FUNC_CACHE_PREFER_L1,
815  // aliases
816  none = no_preference,
817  equal = equal_l1_and_shared_memory,
818  prefer_shared = prefer_shared_memory_over_l1,
819  prefer_l1 = prefer_l1_over_shared_memory,
820 };
821 
833  : ::std::underlying_type<CUsharedconfig>::type
834 {
835  device_default = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE,
836  four_bytes_per_bank = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE,
837  eight_bytes_per_bank = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
838 };
839 
844 namespace device {
845 
852 using id_t = CUdevice;
853 
858 using attribute_t = CUdevice_attribute;
862 using attribute_value_t = int;
863 
864 namespace peer_to_peer {
865 
871 using attribute_t = CUdevice_P2PAttribute;
872 
873 } // namespace peer_to_peer
874 
875 } // namespace device
876 
877 namespace context {
878 
880 using handle_t = CUcontext;
881 
882 using flags_t = unsigned;
883 
887 
898  heuristic = CU_CTX_SCHED_AUTO,
899 
904 
914  spin = CU_CTX_SCHED_SPIN,
915 
922  block = CU_CTX_SCHED_BLOCKING_SYNC,
923 
933  yield = CU_CTX_SCHED_YIELD,
934 
937 };
938 
939 } // namespace context
940 
941 namespace device {
942 
943 using flags_t = context::flags_t;
944 
945 namespace primary_context {
946 
949 
950 } // namespace primary_context
951 
954 
955 } // namespace device
956 
957 namespace detail_ {
958 
959 template <typename T, typename U>
960 inline T identity_cast(U&& x)
961 {
962  static_assert(::std::is_same<
963  typename ::std::remove_reference<T>::type,
964  typename ::std::remove_reference<U>::type
965  >::value,
966  "Casting to a different type - don't use identity_cast");
967  return static_cast<T>(::std::forward<U>(x));
968 }
969 
970 } // namespace detail_
971 
973 using uuid_t = CUuuid;
974 
975 namespace module {
976 
978 using handle_t = CUmodule;
979 
980 } // namespace module
981 
982 namespace kernel {
983 
985 using attribute_t = CUfunction_attribute;
986 
990 using attribute_value_t = int;
991 
992 // A raw CUDA driver handle for a kernel; prefer using the @ref cuda::kernel_t type.
993 using handle_t = CUfunction;
994 
995 } // namespace kernel
996 
997 #if CUDA_VERSION >= 10000
998 
1004 namespace graph {
1005 
1006 #if CUDA_VERSION >= 13010
1007 using id_t = unsigned;
1009 #endif
1010 
1011 namespace node {
1012 
1014 using handle_t = CUgraphNode;
1016 using const_handle_t = CUgraphNode_st const *;
1017 
1018 constexpr const const_handle_t no_handle = nullptr;
1019 
1020 } // namespace node
1021 
1027 namespace template_ {
1028 
1030 using handle_t = CUgraph;
1031 constexpr const handle_t null_handle = nullptr;
1032 
1033 } // namespace template_
1034 
1040 namespace instance {
1041 
1043 using handle_t = CUgraphExec;
1044 
1045 } // namespace instance
1046 
1047 } // namespace graph
1048 
1049 #endif // CUDA_VERSION >= 10000
1050 
1051 } // namespace cuda
1052 
1053 #endif // CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:862
constexpr size_t dimensionality() const noexcept
Definition: types.hpp:516
Keep control and spin-check for result availability.
Definition: types.hpp:914
int attribute_value_t
The uniform type the CUDA driver uses for all kernel attributes; it is typically more appropriate to ...
Definition: types.hpp:990
Alias for the default behavior; see heuristic .
Definition: types.hpp:903
cuda::context::handle_t handle_t
Raw CUDA driver handle for a device&#39;s primary context.
Definition: types.hpp:948
decltype(dim3::x) dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:296
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:243
Divide the cache resources to maximize available L1 cache at the expense of shared memory...
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
typename cuda::rtc::detail_::types< Kind >::handle_type handle_t
Raw program handle used by the NVIDIA run-time compilation libraries&#39;s API calls: // The NVRTC librar...
Definition: types.hpp:124
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
dimension_t width
The three constituent individual dimensions, named.
Definition: types.hpp:109
dimension_t block_dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:309
Default behavior; yield or spin based on a heuristic.
Definition: types.hpp:898
Yield control while waiting for results.
Definition: types.hpp:933
CUmemLocation location_t
Used in a limited number of API functions which can relate both to CUDA device memory and system memo...
Definition: types.hpp:555
constexpr dimension_type operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:467
CUuuid uuid_t
The CUDA-driver-specific representation of a UUID value; see also {device_t::uuid()}.
Definition: types.hpp:973
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
static constexpr bool divides(dimensions_t lhs, dimensions_t rhs)
Definition: types.hpp:364
constexpr dimension_t operator[](int i) const noexcept
Provides array-like access to the dimensions in different axes.
Definition: types.hpp:376
context::host_thread_sync_scheduling_policy_t host_thread_sync_scheduling_policy_t
Definition: types.hpp:953
A richer (kind-of-a-)wrapper for CUDA&#39;s dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:322
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:214
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)&#39;s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:832
CUdevice_attribute attribute_t
CUDA devices have both "attributes" and "properties".
Definition: types.hpp:858
void point(const CharT *description, color_t color=color_t::Black())
Mark a single point on the profiler timeline, giving it also a color and some descriptive text...
Definition: profiling.hpp:185
unsigned size_t
Each physical core ("Symmetric Multiprocessor") on an nVIDIA GPU has a space of shared memory (see th...
Definition: types.hpp:732
dimension_t width
The two constituent individual dimensions, named; no "depth" for the 2D case.
Definition: types.hpp:159
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:886
Block the thread until results are available.
Definition: types.hpp:922
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:82
CUpointer_attribute attribute_t
Raw CUDA driver choice type for attributes of pointers.
Definition: types.hpp:664
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
address_t address(memory::const_region_t region) noexcept
Definition: types.hpp:695
CUstreamCallback callback_t
The CUDA driver&#39;s raw handle for a host-side callback function.
Definition: types.hpp:254
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:806
Composite dimensions for a grid - in terms of blocks, then also down into the block dimensions comple...
Definition: types.hpp:505
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:871
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
size_t overall_dimension_t
Dimension of a grid in threads along one axis, i.e.
Definition: types.hpp:423
Dimensions of a grid in threads, i.e.
Definition: types.hpp:429
detail_::region_helper< memory::const_region_t > const_region_t
A child class of the generic const_region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1976
size_t dimension_t
An individual dimension extent for an array.
Definition: types.hpp:91
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:236
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:246
constexpr overall_dimensions_t flatten() const noexcept
Definition: types.hpp:510
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: types.hpp:674
constexpr size_t volume() const noexcept
Definition: types.hpp:513
see heuristic
Definition: types.hpp:936
Divide the cache resources equally between actual L1 cache and shared memory.
static constexpr composite_dimensions_t point() noexcept
A named constructor idiom for the composite dimensions of a single-block grid with a single-thread bl...
Definition: types.hpp:520
Divide the cache resources to maximize available shared memory at the expense of L1 cache...
No preference for more L1 cache or for more shared memory; the API can do as it please.
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:755
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74
CUDA&#39;s array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:102
CUfunction_attribute attribute_t
Raw CUDA driver selector of a kernel attribute.
Definition: types.hpp:985
void zero(void *start, size_t num_bytes, optional_ref< const stream_t > stream={})
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:418