cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
types.hpp
Go to the documentation of this file.
1 
18 #pragma once
19 #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
20 #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
21 
22 #if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L))
23 #error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard"
24 #endif
25 
26 #include "detail/optional.hpp"
27 #include "detail/span.hpp"
28 
29 #ifndef __CUDACC__
30 #include <builtin_types.h>
31 #endif
32 #include <cuda.h>
33 
34 #include <type_traits>
35 #include <utility>
36 #include <cassert>
37 #include <cstddef> // for ::std::size_t
38 #include <cstdint>
39 #include <vector>
40 #ifndef NDEBUG
41 #include <stdexcept>
42 #endif
43 
44 #ifndef __CUDACC__
45 #ifndef __device__
46 #define __device__
47 #define __host__
48 #endif
49 #endif
50 
51 #ifndef CPP14_CONSTEXPR
52 #if __cplusplus >= 201402L
53 #define CPP14_CONSTEXPR constexpr
54 #else
55 #define CPP14_CONSTEXPR
56 #endif
57 #endif
58 
59 #ifdef NDEBUG
60 #define NOEXCEPT_IF_NDEBUG noexcept(true)
61 #else
62 #define NOEXCEPT_IF_NDEBUG noexcept(false)
63 #endif
64 
65 #ifdef _MSC_VER
66 /*
67  * Microsoft Visual C++ (upto v2017) does not support the C++
68  * keywords `and`, `or` and `not`. Apparently, the following
69  * include is a work-around.
70  */
71 #include <ciso646>
72 #endif
73 
77 namespace cuda {
78 
79 namespace detail_ {
80 
81 template <bool B>
82 using bool_constant = ::std::integral_constant<bool, B>;
83 
84 using true_type = bool_constant<true>;
85 using false_type = bool_constant<false>;
86 
87 template<bool...> struct bool_pack;
88 
89 template<bool... bs>
90 using all_true = ::std::is_same<bool_pack<bs..., true>, bool_pack<true, bs...>>;
91 
92 // This is available in C++17 as ::std::void_t, but we're only assuming C++11
93 template<typename...>
94 using void_t = void;
95 
96 // This is available in C++14
97 template<bool B, typename T = void>
98 using enable_if_t = typename ::std::enable_if<B, T>::type;
99 
100 template<typename T>
101 using remove_reference_t = typename ::std::remove_reference<T>::type;
102 
103 // primary template handles types that have no nested ::type member:
104 template <typename, typename = void>
105 struct has_data_method : ::std::false_type { };
106 
107 // specialization recognizes types that do have a nested ::type member:
108 template <typename T>
109 struct has_data_method<T, cuda::detail_::void_t<decltype(::std::declval<T>().data())>> : ::std::true_type { };
110 
111 template <typename, typename = void>
112 struct has_value_type_member : ::std::false_type { };
113 
114 template <typename T>
115 struct has_value_type_member<T, cuda::detail_::void_t<typename T::value_type>> : ::std::true_type { };
116 
117 // TODO: Consider either beefing up this type trait or ditching it in favor of something simpler, or
118 // in the standard library
119 template <typename T>
120 struct is_kinda_like_contiguous_container :
121  ::std::integral_constant<bool,
122  has_data_method<typename ::std::remove_reference<T>::type>::value
123  and has_value_type_member<typename ::std::remove_reference<T>::type>::value
124  > {};
125 
126 } // namespace detail_
127 
136 using status_t = CUresult;
137 
138 using size_t = ::std::size_t;
139 
144 using dimensionality_t = size_t;
145 
146 namespace array {
147 
148 using dimension_t = size_t;
149 
158 template<dimensionality_t NumDimensions>
160 
164 template<>
165 struct dimensions_t<3> // this almost-inherits cudaExtent
166 {
167  dimension_t width, height, depth;
168 
169  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_, dimension_t depth_)
170  : width(width_), height(height_), depth(depth_) { }
171  constexpr __host__ __device__ dimensions_t(cudaExtent e)
172  : dimensions_t(e.width, e.height, e.depth) { }
173  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
174  : dimensions_t(other.width, other.height, other.depth) { }
175  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
176  : dimensions_t(other.width, other.height, other.depth) { }
177  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
178  : dimensions_t(linear_size, 1, 1) { }
179 
180  CPP14_CONSTEXPR dimensions_t& operator=(const dimensions_t& other) = default;
181  CPP14_CONSTEXPR dimensions_t& operator=(dimensions_t&& other) = default;
182 
183  constexpr __host__ __device__ operator cudaExtent() const
184  {
185  return { width, height, depth };
186  // Note: We're not using make_cudaExtent here because:
187  // 1. It's not constexpr and
188  // 2. It doesn't do anything except construct the plain struct - as of CUDA 10 at least
189  }
190 
191  constexpr __host__ __device__ size_t volume() const { return width * height * depth; }
192  constexpr __host__ __device__ size_t size() const { return volume(); }
193  constexpr __host__ __device__ dimensionality_t dimensionality() const
194  {
195  return ((width > 1) + (height> 1) + (depth > 1));
196  }
197 
198  // Named constructor idioms
199 
200  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) { return dimensions_t{ x, x, x }; }
201  static constexpr __host__ __device__ dimensions_t zero() { return cube(0); }
202 };
203 
207 template<>
208 struct dimensions_t<2>
209 {
210  dimension_t width, height;
211 
212  constexpr __host__ __device__ dimensions_t(dimension_t width_, dimension_t height_)
213  : width(width_), height(height_) { }
214  constexpr __host__ __device__ dimensions_t(const dimensions_t& other)
215  : dimensions_t(other.width, other.height) { }
216  constexpr __host__ __device__ dimensions_t(dimensions_t&& other)
217  : dimensions_t(other.width, other.height) { }
218  constexpr __host__ __device__ dimensions_t(dimension_t linear_size)
219  : dimensions_t(linear_size, 1) { }
220 
221  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(const dimensions_t& other)
222  {
223  width = other.width; height = other.height;
224  return *this;
225 
226  }
227  CPP14_CONSTEXPR __host__ __device__ dimensions_t& operator=(dimensions_t&& other)
228  {
229  width = other.width; height = other.height;
230  return *this;
231  }
232 
233  constexpr __host__ __device__ size_t area() const { return width * height; }
234  constexpr __host__ __device__ size_t size() const { return area(); }
235  constexpr __host__ __device__ dimensionality_t dimensionality() const
236  {
237  return ((width > 1) + (height> 1));
238  }
239 
240  // Named constructor idioms
241 
242  static constexpr __host__ __device__ dimensions_t square(dimension_t x) { return dimensions_t{ x, x }; }
243  static constexpr __host__ __device__ dimensions_t zero() { return square(0); }
244 };
245 
246 } // namespace array
247 
252 namespace event {
253 
257 using handle_t = CUevent;
258 
259 namespace ipc {
260 
265 using handle_t = CUipcEventHandle;
266 
267 } // namespace ipc
268 
269 } // namespace event
270 
276 namespace stream {
277 
281 using handle_t = CUstream;
282 
288 using priority_t = int;
289 enum : priority_t {
295 };
296 
297 namespace detail_ {
298 
299 #if CUDA_VERSION >= 10000
300 using callback_t = CUhostFn;
301 #else
302 using callback_t = CUstreamCallback;
303 #endif
304 
305 } // namespace detail_
306 
307 
308 } // namespace stream
309 
310 namespace grid {
311 
319 using dimension_t = decltype(dim3::x);
320 
333 
334 
347 struct dimensions_t // this almost-inherits dim3
348 {
349  dimension_t x, y, z;
350  constexpr __host__ __device__ dimensions_t(dimension_t x_ = 1, dimension_t y_ = 1, dimension_t z_ = 1)
351  : x(x_), y(y_), z(z_) { }
352 
353  constexpr __host__ __device__ dimensions_t(const uint3& v) : dimensions_t(v.x, v.y, v.z) { }
354  constexpr __host__ __device__ dimensions_t(const dim3& dims) : dimensions_t(dims.x, dims.y, dims.z) { }
355  constexpr __host__ __device__ dimensions_t(dim3&& dims) : dimensions_t(dims.x, dims.y, dims.z) { }
356 
357  constexpr __host__ __device__ operator uint3(void) const { return { x, y, z }; }
358 
359  // This _should_ have been constexpr, but nVIDIA have not marked the dim3 constructors
360  // as constexpr, so it isn't
361  __host__ __device__ operator dim3(void) const { return { x, y, z }; }
362 
363  constexpr __host__ __device__ size_t volume() const { return static_cast<size_t>(x) * y * z; }
364  constexpr __host__ __device__ dimensionality_t dimensionality() const
365  {
366  return ((z > 1) + (y > 1) + (x > 1));
367  }
368 
369  // Named constructor idioms
370 
371  static constexpr __host__ __device__ dimensions_t cube(dimension_t x) { return dimensions_t{ x, x, x }; }
372  static constexpr __host__ __device__ dimensions_t square(dimension_t x) { return dimensions_t{ x, x, 1 }; }
373  static constexpr __host__ __device__ dimensions_t line(dimension_t x) { return dimensions_t{ x, 1, 1 }; }
374  static constexpr __host__ __device__ dimensions_t point() { return dimensions_t{ 1, 1, 1 }; }
375 
376  static bool divides(dimensions_t lhs, dimensions_t rhs)
377  {
378  return
379  (rhs.x % lhs.x == 0) and
380  (rhs.y % lhs.y == 0) and
381  (rhs.z % lhs.z == 0);
382  }
383 };
384 
386 constexpr inline bool operator==(const dim3& lhs, const dim3& rhs) noexcept
387 {
388  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
389 }
390 constexpr inline bool operator!=(const dim3& lhs, const dim3& rhs) noexcept
391 {
392  return not (lhs == rhs);
393 }
394 constexpr inline bool operator==(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
395 {
396  return lhs.x == rhs.x and lhs.y == rhs.y and lhs.z == rhs.z;
397 }
398 constexpr inline bool operator!=(const dimensions_t& lhs, const dimensions_t& rhs) noexcept
399 {
400  return not (lhs == rhs);
401 }
403 
404 
413 
414 struct overall_dimensions_t;
420  grid::dimensions_t grid;
422 
426  constexpr overall_dimensions_t flatten() const;
427  constexpr size_t volume() const;
428  constexpr size_t dimensionality() const;
429 
430  static constexpr composite_dimensions_t point()
431  {
432  return { dimensions_t::point(), block_dimensions_t::point() };
433  }
434 };
435 
436 constexpr bool operator==(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
437 {
438  return (lhs.grid == rhs.grid) and (lhs.block == rhs.block);
439 }
440 
441 constexpr bool operator!=(composite_dimensions_t lhs, composite_dimensions_t rhs) noexcept
442 {
443  return not (lhs == rhs);
444 }
445 
446 
452 using overall_dimension_t = size_t;
453 
459 {
460  using dimension_type = overall_dimension_t;
461  dimension_type x, y, z;
462 
463  constexpr __host__ __device__ overall_dimensions_t(
464  dimension_type width_, dimension_type height_, dimension_type depth_) noexcept
465  : x(width_), y(height_), z(depth_) { }
466 
467  constexpr __host__ __device__ overall_dimensions_t(const dim3& dims) noexcept
468  : x(dims.x), y(dims.y), z(dims.z) { }
469 
470  constexpr __host__ __device__ overall_dimensions_t(dim3&& dims) noexcept
471  : x(dims.x), y(dims.y), z(dims.z) { }
472 
473  constexpr __host__ __device__ overall_dimensions_t(const overall_dimensions_t& other) noexcept
474  : overall_dimensions_t(other.x, other.y, other.z) { }
475 
476  constexpr __host__ __device__ overall_dimensions_t(overall_dimensions_t&& other) noexcept
477  : overall_dimensions_t(other.x, other.y, other.z) { }
478 
479  explicit constexpr __host__ __device__ overall_dimensions_t(dimensions_t dims) noexcept
480  : overall_dimensions_t(dims.x, dims.y, dims.z) { }
481 
482  CPP14_CONSTEXPR overall_dimensions_t& operator=(const overall_dimensions_t& other) noexcept = default;
483  CPP14_CONSTEXPR overall_dimensions_t& operator=(overall_dimensions_t&& other) noexcept = default;
484 
485  constexpr __host__ __device__ size_t volume() const noexcept { return x * y * z; }
486  constexpr __host__ __device__ size_t size() const noexcept { return volume(); }
487  constexpr __host__ __device__ dimensionality_t dimensionality() const noexcept
488  {
489  return ((x > 1) + (y > 1) + (z > 1));
490  }
491 };
492 
493 constexpr bool operator==(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
494 {
495  return (lhs.x == rhs.x) and (lhs.y == rhs.y) and (lhs.z == rhs.z);
496 }
497 
498 constexpr bool operator!=(overall_dimensions_t lhs, overall_dimensions_t rhs) noexcept
499 {
500  return not (lhs == rhs);
501 }
502 
503 constexpr overall_dimensions_t operator*(dimensions_t grid_dims, block_dimensions_t block_dims) noexcept
504 {
505  return overall_dimensions_t {
506  grid_dims.x * overall_dimension_t { block_dims.x },
507  grid_dims.y * overall_dimension_t { block_dims.y },
508  grid_dims.z * overall_dimension_t { block_dims.z },
509  };
510 }
511 
512 constexpr overall_dimensions_t composite_dimensions_t::flatten() const { return grid * block; }
513 constexpr size_t composite_dimensions_t::volume() const { return flatten().volume(); }
514 constexpr size_t composite_dimensions_t::dimensionality() const { return flatten().dimensionality(); }
515 
516 } // namespace grid
517 
524 namespace memory {
525 
526 #if CUDA_VERSION >= 10020
527 enum : bool {
528  read_enabled = true,
529  read_disabled = false,
530  write_enabled = true,
531  write_disabled = false
532 };
533 
534 struct access_permissions_t {
535  bool read : 1;
536  bool write : 1;
537 
538  operator CUmemAccess_flags() const noexcept
539  {
540  return read ?
541  (write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ) :
542  CU_MEM_ACCESS_FLAGS_PROT_NONE;
543  }
544 
545  static access_permissions_t from_access_flags(CUmemAccess_flags access_flags)
546  {
547  access_permissions_t result;
548  result.read = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READ);
549  result.write = (access_flags & CU_MEM_ACCESS_FLAGS_PROT_READWRITE);
550  return result;
551  }
552 
553  static constexpr access_permissions_t read_and_write()
554  {
555  return access_permissions_t{ read_enabled, write_enabled };
556  }
557 };
558 
559 namespace physical_allocation {
560 
561 // TODO: Consider simply aliasing CUmemAllocationHandleType and using constexpr const's or anonymous enums
562 enum class shared_handle_kind_t : ::std::underlying_type<CUmemAllocationHandleType>::type {
563 #if CUDA_VERSION >= 11020
564  no_export = CU_MEM_HANDLE_TYPE_NONE,
565 #endif
566  posix_file_descriptor = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
567  win32_handle = CU_MEM_HANDLE_TYPE_WIN32,
568  win32_kmt = CU_MEM_HANDLE_TYPE_WIN32_KMT,
569 };
570 
571 namespace detail_ {
572 
573 template<shared_handle_kind_t SharedHandleKind> struct shared_handle_type_helper;
574 
575 template <> struct shared_handle_type_helper<shared_handle_kind_t::posix_file_descriptor> { using type = int; };
576 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
577 template <> struct shared_handle_type_helper<shared_handle_kind_t::win32_handle> { using type = void *; };
578 #endif
579 // TODO: What about WIN32_KMT?
580 
581 } // namespace detail_
582 
583 template<shared_handle_kind_t SharedHandleKind>
584 using shared_handle_t = typename detail_::shared_handle_type_helper<SharedHandleKind>::type;
585 
586 } // namespace physical_allocation
587 #endif // CUDA_VERSION >= 10020
588 #if CUDA_VERSION >= 11020
589 
590 namespace pool {
594 using handle_t = CUmemoryPool;
595 using shared_handle_kind_t = physical_allocation::shared_handle_kind_t;
596 using physical_allocation::shared_handle_t;
597 
598 } // namespace pool
599 #endif // CUDA_VERSION >= 11020
600 
601 namespace pointer {
602 
603 using attribute_t = CUpointer_attribute;
604 
605 } // namespace pointer
606 
607 namespace device {
608 
612 using address_t = CUdeviceptr;
613 
614 static_assert(sizeof(void *) == sizeof(device::address_t), "Unexpected address size");
615 
621 inline address_t address(const void* device_ptr) noexcept
622 {
623  static_assert(sizeof(void*) == sizeof(address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
624  return reinterpret_cast<address_t>(device_ptr);
625 }
626 
627 } // namespace device
628 
629 inline void* as_pointer(device::address_t address) noexcept
630 {
631  static_assert(sizeof(void*) == sizeof(device::address_t), "Incompatible sizes for a void pointer and memory::device::address_t");
632  return reinterpret_cast<void*>(address);
633 }
634 
635 namespace shared {
636 
649 using size_t = unsigned;
650 
651 } // namespace shared
652 
653 namespace managed {
654 
655 enum class initial_visibility_t {
656  to_all_devices,
657  to_supporters_of_concurrent_managed_access,
658 };
659 
660 using range_attribute_t = CUmem_range_attribute;
661 
662 } // namespace managed
663 
664 #if CUDA_VERSION >= 11070
665 enum class barrier_scope_t : typename ::std::underlying_type<CUstreamMemoryBarrier_flags>::type {
666  device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
667  system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
668 };
669 #endif // CUDA_VERSION >= 11700
670 
671 #if CUDA_VERSION >= 10000
672 
675 namespace external {
676 
677 using handle_t = CUexternalMemory;
678 
682 struct subregion_spec_t {
683  size_t offset;
684  size_t size;
685 };
686 
687 } // namespace external
688 
689 #endif // CUDA_VERSION >= 10000
690 
691 } // namespace memory
692 
698 
706 enum class multiprocessor_cache_preference_t : ::std::underlying_type<CUfunc_cache_enum>::type {
708  no_preference = CU_FUNC_CACHE_PREFER_NONE,
710  equal_l1_and_shared_memory = CU_FUNC_CACHE_PREFER_EQUAL,
712  prefer_shared_memory_over_l1 = CU_FUNC_CACHE_PREFER_SHARED,
714  prefer_l1_over_shared_memory = CU_FUNC_CACHE_PREFER_L1,
715  // aliases
716  none = no_preference,
718  prefer_shared = prefer_shared_memory_over_l1,
719  prefer_l1 = prefer_l1_over_shared_memory,
720 };
721 
733  : ::std::underlying_type<CUsharedconfig>::type
734 {
735  device_default = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE,
736  four_bytes_per_bank = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE,
737  eight_bytes_per_bank = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
738 };
739 
744 namespace device {
745 
752 using id_t = CUdevice;
753 
758 using attribute_t = CUdevice_attribute;
762 using attribute_value_t = int;
763 
764 namespace peer_to_peer {
765 
771 using attribute_t = CUdevice_P2PAttribute;
772 
773 } // namespace peer_to_peer
774 
775 } // namespace device
776 
777 namespace context {
778 
779 using handle_t = CUcontext;
780 
781 using flags_t = unsigned;
782 
789 
800  heuristic = CU_CTX_SCHED_AUTO,
801 
806 
816  spin = CU_CTX_SCHED_SPIN,
817 
824  block = CU_CTX_SCHED_BLOCKING_SYNC,
825 
835  yield = CU_CTX_SCHED_YIELD,
836 
839 };
840 
841 } // namespace context
842 
843 namespace device {
844 
845 using flags_t = context::flags_t;
846 
847 namespace primary_context {
848 
849 using handle_t = cuda::context::handle_t;
850 
851 } // namespace primary_context
852 
854 
855 } // namespace device
856 
857 using native_word_t = unsigned;
858 
859 namespace detail_ {
860 
861 template <typename T, typename U>
862 inline T identity_cast(U&& x)
863 {
864  static_assert(::std::is_same<
865  typename ::std::remove_reference<T>::type,
866  typename ::std::remove_reference<U>::type
867  >::value,
868  "Casting to a different type - don't use identity_cast");
869  return static_cast<T>(::std::forward<U>(x));
870 }
871 
872 } // namespace detail_
873 
874 using uuid_t = CUuuid;
875 
876 namespace module {
877 
878 using handle_t = CUmodule;
879 
880 } // namespace module
881 
882 namespace kernel {
883 
884 using attribute_t = CUfunction_attribute;
885 using attribute_value_t = int;
886 
887 // TODO: Is this really only for kernels, or can any device-side function be
888 // represented by a CUfunction?
889 using handle_t = CUfunction;
890 
891 } // namespace kernel
892 
893 // The C++ standard library doesn't offer ::std::dynarray (although it almost did),
894 // and we won't introduce our own here. So...
895 template <typename T>
896 using dynarray = ::std::vector<T>;
897 
898 } // namespace cuda
899 
900 #ifndef __CUDACC__
901 #ifndef __device__
902 #define __device__
903 #define __host__
904 #endif
905 #endif
906 
907 #endif // CUDA_API_WRAPPERS_COMMON_TYPES_HPP_
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:762
Keep control and spin-check for result availability.
Definition: types.hpp:816
Alias for the default behavior; see heuristic .
Definition: types.hpp:805
decltype(dim3::x) dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:319
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:288
Divide the cache resources to maximize available L1 cache at the expense of shared memory...
An implementation or an importation of a cuda::optional class and related definitions.
Definition: launch_configuration.hpp:58
Definition: kernel_launch.hpp:238
dimension_t block_dimension_t
CUDA kernels are launched in grids of blocks of threads, in 3 dimensions.
Definition: types.hpp:332
Default behavior; yield or spin based on a heuristic.
Definition: types.hpp:800
Yield control while waiting for results.
Definition: types.hpp:835
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:752
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:771
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:294
Definition: kernel_launch.hpp:77
A richer (kind-of-a-)wrapper for CUDA&#39;s dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:347
CUevent handle_t
The CUDA Runtime API&#39;s numeric handle for events.
Definition: types.hpp:257
multiprocessor_shared_memory_bank_size_option_t
A physical core (SM)&#39;s shared memory has multiple "banks"; at most one datum per bank may be accessed...
Definition: types.hpp:732
CUdevice_attribute attribute_t
CUDA devices have both "attributes" and "properties".
Definition: types.hpp:758
host_thread_sync_scheduling_policy_t
Scheduling policies the Runtime API may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:788
Block the thread until results are available.
Definition: types.hpp:824
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: region.hpp:24
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:144
Contains an implementation of an std::span-like class, cuda::span
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:706
Composite dimensions for a grid - in terms of blocks, then also down into the block dimensions comple...
Definition: types.hpp:419
size_t overall_dimension_t
Dimension of a grid in threads along one axis, i.e.
Definition: types.hpp:452
Dimensions of a grid in threads, i.e.
Definition: types.hpp:458
address_t address(const void *device_ptr) noexcept
Return a pointers address as a numeric value of the type appropriate for device.
Definition: types.hpp:621
CUstream handle_t
The CUDA API&#39;s handle for streams.
Definition: types.hpp:281
Representation, allocation and manipulation of CUDA-related memory, of different kinds.
see heuristic
Definition: types.hpp:838
Divide the cache resources equally between actual L1 cache and shared memory.
Divide the cache resources to maximize available shared memory at the expense of L1 cache...
No preference for more L1 cache or for more shared memory; the API can do as it please.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:136
CUDA&#39;s array memory-objects are multi-dimensional; but their dimensions, or extents, are not the same as cuda::grid::dimensions_t ; they may be much larger in each axis.
Definition: types.hpp:159
void zero(void *start, size_t num_bytes)
Sets all bytes in a region of memory to 0 (zero)
Definition: memory.hpp:363