cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
context.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_CONTEXT_HPP_
8 #define CUDA_API_WRAPPERS_CONTEXT_HPP_
9 
10 #include "current_context.hpp"
11 #include "versions.hpp"
12 #include "error.hpp"
13 #include "constants.hpp"
14 #include "types.hpp"
15 
16 #include <string>
17 #include <utility>
18 
19 namespace cuda {
20 
22 class device_t;
23 class event_t;
24 class context_t;
25 class stream_t;
26 class module_t;
28 
29 namespace link {
30 struct options_t;
31 } // namespace link
32 
33 namespace context {
34 
37 using limit_t = CUlimit;
38 
42 
44 using shared_memory_bank_size_t = CUsharedconfig;
45 
53 
56 
61  constexpr bool is_trivial() const
62  {
63  return least == stream::default_priority and greatest == stream::default_priority;
64  }
65 };
66 
84  device::id_t device_id,
85  context::handle_t context_id,
86  bool take_ownership = false) noexcept;
87 
88 namespace detail_ {
89 
90 ::std::string identify(const context_t& context);
91 
92 inline limit_value_t get_limit(limit_t limit_id)
93 {
94  limit_value_t limit_value;
95  auto status = cuCtxGetLimit(&limit_value, limit_id);
96  throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
97  return limit_value;
98 }
99 
100 inline void set_limit(limit_t limit_id, limit_value_t new_value)
101 {
102  auto status = cuCtxSetLimit(limit_id, new_value);
103  throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
104 }
105 
106 constexpr flags_t inline make_flags(
107  host_thread_sync_scheduling_policy_t sync_scheduling_policy,
108  bool keep_larger_local_mem_after_resize)
109 {
110  return
111  sync_scheduling_policy // this enum value is also a valid bitmask
112  | (keep_larger_local_mem_after_resize ? CU_CTX_LMEM_RESIZE_TO_MAX : 0);
113 }
114 
115 // consider renaming this: device_id_of
116 inline device::id_t get_device_id(handle_t context_handle)
117 {
118  auto needed_push = current::detail_::push_if_not_on_top(context_handle);
119  auto device_id = current::detail_::get_device_id();
120  if (needed_push) {
121  current::detail_::pop();
122  }
123  return device_id;
124 }
125 
126 
127 context_t from_handle(
128  context::handle_t context_handle,
129  bool take_ownership = false);
130 
131 inline size_t total_memory(handle_t handle)
132 {
133  size_t total_mem_in_bytes;
134  auto status = cuMemGetInfo(nullptr, &total_mem_in_bytes);
135  throw_if_error_lazy(status, "Failed determining amount of total memory for " + identify(handle));
136  return total_mem_in_bytes;
137 
138 }
139 
140 inline size_t free_memory(handle_t handle)
141 {
142  size_t free_mem_in_bytes;
143  auto status = cuMemGetInfo(&free_mem_in_bytes, nullptr);
144  throw_if_error_lazy(status, "Failed determining amount of free memory for " + identify(handle));
145  return free_mem_in_bytes;
146 }
147 
148 inline void set_cache_preference(handle_t handle, multiprocessor_cache_preference_t preference)
149 {
150  auto status = cuCtxSetCacheConfig(static_cast<CUfunc_cache>(preference));
151  throw_if_error_lazy(status,
152  "Setting the multiprocessor L1/Shared Memory cache distribution preference to " +
153  ::std::to_string(static_cast<unsigned>(preference)) + " for " + identify(handle));
154 }
155 
156 inline multiprocessor_cache_preference_t cache_preference(handle_t handle)
157 {
158  CUfunc_cache preference;
159  auto status = cuCtxGetCacheConfig(&preference);
160  throw_if_error_lazy(status,
161  "Obtaining the multiprocessor L1/Shared Memory cache distribution preference for " + identify(handle));
162  return static_cast<multiprocessor_cache_preference_t>(preference);
163 }
164 
165 #if CUDA_VERSION < 12030
166 inline shared_memory_bank_size_t shared_memory_bank_size(handle_t handle)
167 {
168  CUsharedconfig bank_size;
169  auto status = cuCtxGetSharedMemConfig(&bank_size);
170  throw_if_error_lazy(status, "Obtaining the multiprocessor shared memory bank size for " + identify(handle));
171  return static_cast<shared_memory_bank_size_t>(bank_size);
172 }
173 #endif // CUDA_VERSION < 12030
174 
175 #if CUDA_VERSION < 12030
176 inline void set_shared_memory_bank_size(handle_t handle, shared_memory_bank_size_t bank_size)
177 {
178  auto status = cuCtxSetSharedMemConfig(static_cast<CUsharedconfig>(bank_size));
179  throw_if_error_lazy(status, "Setting the multiprocessor shared memory bank size for " + identify(handle));
180 }
181 #endif // CUDA_VERSION < 12030
182 
183 
184 inline void synchronize(context::handle_t handle)
185 {
186  CAW_SET_SCOPE_CONTEXT(handle);
187  context::current::detail_::synchronize(handle);
188 }
189 
190 inline void synchronize(device::id_t device_id, context::handle_t handle)
191 {
192  CAW_SET_SCOPE_CONTEXT(handle);
193  context::current::detail_::synchronize(device_id, handle);
194 }
195 
196 inline status_t destroy_nothrow(handle_t handle) noexcept
197 {
198  return cuCtxDestroy(handle);
199 }
200 
201 inline void destroy(handle_t handle)
202 {
203  auto status = destroy_nothrow(handle);
204  throw_if_error_lazy(status, "Failed destroying " + identify(handle));
205 }
206 
207 inline void destroy(handle_t handle, device::id_t device_index)
208 {
209  auto status = destroy_nothrow(handle);
210  throw_if_error_lazy(status, "Failed destroying " + identify(handle, device_index));
211 }
212 
213 inline context::flags_t get_flags(handle_t handle)
214 {
215  CAW_SET_SCOPE_CONTEXT(handle);
216  return context::current::detail_::get_flags();
217 }
218 
219 } // namespace detail_
220 
221 } // namespace context
222 
232 inline void synchronize(const context_t& context);
233 
249 class context_t {
250 protected: // types
252  using flags_type = context::flags_t;
253 
254 public: // types
255 
256  static_assert(
257  ::std::is_same<::std::underlying_type<CUsharedconfig>::type, ::std::underlying_type<cudaSharedMemConfig>::type>::value,
258  "Unexpected difference between enumerators used for the same purpose by the CUDA runtime and the CUDA driver");
259 
260 public: // inner classes
261 
268 #if CUDA_VERSION >= 11040
269  public: // data types
270  using execution_graph_related_attribute_t = CUgraphMem_attribute;
271 #endif // CUDA_VERSION >= 11040
272 
273 
274  protected: // data members
275  const device::id_t device_id_;
276  const context::handle_t context_handle_;
277 
278  public:
279  global_memory_type(device::id_t device_id, context::handle_t context_handle)
280  : device_id_(device_id), context_handle_(context_handle)
281  {}
282 
284  device_t associated_device() const;
285 
287  context_t associated_context() const;
288 
295  memory::region_t allocate(size_t size_in_bytes) const;
296 
317  memory::region_t allocate_managed(
318  size_t size_in_bytes,
320  cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access) const;
321 
325  size_t amount_total() const
326  {
327  CAW_SET_SCOPE_CONTEXT(context_handle_);
328  return context::detail_::total_memory(context_handle_);
329  }
330 
334  size_t amount_free() const
335  {
336  CAW_SET_SCOPE_CONTEXT(context_handle_);
337  return context::detail_::free_memory(context_handle_);
338  }
339 
340 #if CUDA_VERSION >= 11040
341 
346  void free_unused_execution_graph_memory() const
348  {
349  auto status = cuDeviceGraphMemTrim(device_id_);
350  throw_if_error_lazy(status,
351  "Trimming memory used for CUDA execution graphs on " + device::detail_::identify(device_id_));
352  }
353 
357  size_t get_execution_graph_related_attribute(execution_graph_related_attribute_t attribute) const
358  {
359  cuuint64_t result;
360  auto status = cuDeviceGetGraphMemAttribute(device_id_, attribute, &result);
361  throw_if_error_lazy(status, "Failed obtaining an execution-graph-related memory attribute for "
362  + device::detail_::identify(device_id_));
363  return result;
364  }
365 
366  void reset_execution_graph_usage_high_watermark() const
367  {
368  cuuint64_t value_{0};
369  auto status = cuDeviceSetGraphMemAttribute(device_id_, CU_GRAPH_MEM_ATTR_USED_MEM_HIGH, &value_);
370  throw_if_error_lazy(status, "Failed setting an execution-graph-related memory attribute for "
371  + device::detail_::identify(device_id_));
372  }
374 #endif // CUDA_VERSION >= 11040
375  }; // class global_memory_type
376 
377 
378 public: // data member non-mutator getters
379 
380  context::handle_t handle() const noexcept { return handle_; }
381  device::id_t device_id() const noexcept { return device_id_; }
382  device_t device() const;
383 
386  bool is_owning() const noexcept { return owning_; }
387 
392  size_t total_memory() const
393  {
394  CAW_SET_SCOPE_CONTEXT(handle_);
395  return context::detail_::total_memory(handle_);
396  }
397 
404  size_t free_memory() const
405  {
406  CAW_SET_SCOPE_CONTEXT(handle_);
407  return context::detail_::free_memory(handle_);
408  }
409 
410 public: // other non-mutator methods
411 
412  stream_t default_stream() const;
413 
414  template <typename Kernel, typename ... KernelParameters>
415  void launch(
416  Kernel kernel,
417  launch_configuration_t launch_configuration,
418  KernelParameters... parameters) const;
419 
425  {
426  CAW_SET_SCOPE_CONTEXT(handle_);
427  return context::detail_::cache_preference(handle_);
428  }
429 
431  size_t stack_size() const
432  {
433  CAW_SET_SCOPE_CONTEXT(handle_);
434  return context::detail_::get_limit(CU_LIMIT_STACK_SIZE);
435  }
436 
440  {
441  CAW_SET_SCOPE_CONTEXT(handle_);
442  return context::detail_::get_limit(CU_LIMIT_PRINTF_FIFO_SIZE);
443  }
444 
448  {
449  CAW_SET_SCOPE_CONTEXT(handle_);
450  return context::detail_::get_limit(CU_LIMIT_MALLOC_HEAP_SIZE);
451  }
452 
461  {
462  CAW_SET_SCOPE_CONTEXT(handle_);
463  return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH);
464  }
465 
468  {
469  return { device_id_, handle_ };
470  }
471 
478  {
479  CAW_SET_SCOPE_CONTEXT(handle_);
480  return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT);
481  }
482 
483 #if CUDA_VERSION >= 10000
484 
491  context::limit_value_t l2_fetch_granularity() const
492  {
493  CAW_SET_SCOPE_CONTEXT(handle_);
494  return context::detail_::get_limit(CU_LIMIT_MAX_L2_FETCH_GRANULARITY);
495  }
496 #endif
497 
498 #if CUDA_VERSION < 12030
499 
506  {
507  CAW_SET_SCOPE_CONTEXT(handle_);
508  return context::detail_::shared_memory_bank_size(handle_);
509  }
510 #endif // CUDA_VERSION < 12030
511 
514  bool is_current() const
515  {
516  return context::current::detail_::is_(handle_);
517  }
518 
520  bool is_primary() const;
521 
524  {
525  CAW_SET_SCOPE_CONTEXT(handle_);
527  auto status = cuCtxGetStreamPriorityRange(&result.least, &result.greatest);
528  throw_if_error_lazy(status, "Obtaining the priority range for streams within " +
529  context::detail_::identify(*this));
530  return result;
531  }
532 
536  {
537  CAW_SET_SCOPE_CONTEXT(handle_);
538  return context::detail_::get_limit(limit_id);
539  }
540 
548  {
549  unsigned int raw_version;
550  auto status = cuCtxGetApiVersion(handle_, &raw_version);
551  throw_if_error_lazy(status, "Failed obtaining the API version for " + context::detail_::identify(*this));
552  return version_t::from_single_number(static_cast<combined_version_t>(raw_version));
553  }
554 
555 protected:
557  context::flags_t flags() const
558  {
559  return context::detail_::get_flags(handle_);
560  }
562 
563 public: // methods which mutate the context, but not its wrapper
572  {
573  return context::host_thread_sync_scheduling_policy_t(flags() & CU_CTX_SCHED_MASK);
574  }
575 
576  bool keeping_larger_local_mem_after_resize() const
577  {
578  return flags() & CU_CTX_LMEM_RESIZE_TO_MAX;
579  }
580 
583  stream_t create_stream(
584  bool will_synchronize_with_default_stream,
586 
589  event_t create_event(
590  bool uses_blocking_sync = event::sync_by_busy_waiting, // Yes, that's the runtime default
591  bool records_timing = event::do_record_timings,
593 
597  template <typename ContiguousContainer,
598  cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
599  module_t create_module(ContiguousContainer module_data, const link::options_t& link_options) const;
600 
601  template <typename ContiguousContainer,
602  cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
603  module_t create_module(ContiguousContainer module_data) const;
605 
606 public: // Methods which don't mutate the context, but affect the device itself
607 
608 
610  void enable_access_to(const context_t& peer) const;
611 
613  void disable_access_to(const context_t& peer) const;
614 
617  {
618  CAW_SET_SCOPE_CONTEXT(handle_);
619 #if (CUDA_VERSION >= 11000)
620  auto status = cuCtxResetPersistingL2Cache();
621  throw_if_error_lazy(status, "Failed resetting/clearing the persisting L2 cache memory");
622 #endif
623  throw cuda::runtime_error(
624  cuda::status::insufficient_driver,
625  "Resetting/clearing the persisting L2 cache memory is not supported when compiling CUDA versions lower than 11.0");
626  }
627 
628 public: // other methods which don't mutate this class as a reference, but do mutate the context
629 
630 #if CUDA_VERSION < 12030
631 
638  {
639  CAW_SET_SCOPE_CONTEXT(handle_);
640  context::detail_::set_shared_memory_bank_size(handle_, bank_size);
641  }
642 #endif // CUDA_VERSION < 12030
643 
651  {
652  CAW_SET_SCOPE_CONTEXT(handle_);
653  context::detail_::set_cache_preference(handle_, preference);
654  }
655 
658  void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
659  {
660  CAW_SET_SCOPE_CONTEXT(handle_);
661  return context::detail_::set_limit(limit_id, new_value);
662  }
663 
667  void stack_size(context::limit_value_t new_value) const
668  {
669  return set_limit(CU_LIMIT_STACK_SIZE, new_value);
670  }
671 
672  void printf_buffer_size(context::limit_value_t new_value) const
673  {
674  return set_limit(CU_LIMIT_PRINTF_FIFO_SIZE, new_value);
675  }
676 
677  void memory_allocation_heap_size(context::limit_value_t new_value) const
678  {
679  return set_limit(CU_LIMIT_MALLOC_HEAP_SIZE, new_value);
680  }
681 
682  void set_maximum_depth_of_child_grid_sync_calls(context::limit_value_t new_value) const
683  {
684  return set_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH, new_value);
685  }
686 
687  void set_maximum_outstanding_kernel_launches(context::limit_value_t new_value) const
688  {
689  return set_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT, new_value);
690  }
691 
698  void synchronize() const
699  {
700  cuda::synchronize(*this);
701  }
702 
703 protected: // constructors
704 
705  context_t(
706  device::id_t device_id,
707  context::handle_t context_id,
708  bool take_ownership) noexcept
709  : device_id_(device_id), handle_(context_id), owning_(take_ownership)
710  { }
711 
712 public: // friendship
713 
715  friend context_t context::wrap(
716  device::id_t device_id,
717  context::handle_t context_id,
718  bool take_ownership) noexcept;
720 
721 public: // constructors and destructor
722 
723  context_t(const context_t& other) :
724  context_t(other.device_id_, other.handle_, false)
725  { };
726 
727  context_t(context_t&& other) noexcept:
728  context_t(other.device_id_, other.handle_, other.owning_)
729  {
730  other.owning_ = false;
731  };
732 
733  ~context_t() DESTRUCTOR_EXCEPTION_SPEC
734  {
735  if (not owning_) { return; }
736 #if THROW_IN_DESTRUCTORS
737  context::detail_::destroy(handle_, device_id_);
738 #else
739  context::detail_::destroy_nothrow(handle_);
740 #endif
741  }
742 
743 public: // operators
744 
745  context_t& operator=(const context_t&) = delete;
746  context_t& operator=(context_t&& other) noexcept
747  {
748  ::std::swap(device_id_, other.device_id_);
749  ::std::swap(handle_, other.handle_);
750  ::std::swap(owning_, other.owning_);
751  return *this;
752  }
753 
754 protected: // data members
755  device::id_t device_id_;
756  context::handle_t handle_;
758  bool owning_;
759  // this field is mutable only for enabling move construction; other
760  // than in that case it must not be altered
761 
762  // TODO: Should we hold a field indicating whether this context is
763  // primary or not?
764 };
765 
768 inline bool operator==(const context_t& lhs, const context_t& rhs) noexcept
769 {
770  // Note: Contexts on different devices cannot have the same context handle,
771  // so this is redundant, but let's be extra safe:
772  return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
773 }
774 
775 inline bool operator!=(const context_t& lhs, const context_t& rhs) noexcept
776 {
777  return not (lhs == rhs);
778 }
780 
781 namespace context {
782 
786 inline context_t wrap(
789  device::id_t device_id,
790  handle_t context_id,
791  bool take_ownership) noexcept
792 {
793  return { device_id, context_id, take_ownership };
794 }
795 
796 namespace detail_ {
797 
798 inline context_t from_handle(
799  context::handle_t context_handle,
800  bool take_ownership)
801 {
802  device::id_t device_id = get_device_id(context_handle);
803  return wrap(device_id, context_handle, take_ownership);
804 }
805 
806 inline handle_t create_and_push(
807  device::id_t device_id,
808  host_thread_sync_scheduling_policy_t sync_scheduling_policy = automatic,
809  bool keep_larger_local_mem_after_resize = false)
810 {
811  auto flags = context::detail_::make_flags(
812  sync_scheduling_policy,
813  keep_larger_local_mem_after_resize);
814  handle_t handle;
815 #if CUDA_VERSION >= 13000
816  // TODO: Introduce support for setting meaningful values for these parameters
817  CUctxCreateParams creation_params = {};
818  auto status = cuCtxCreate(&handle, &creation_params, flags, device_id);
819 #else
820  auto status = cuCtxCreate(&handle, flags, device_id);
821 #endif
822  throw_if_error_lazy(status, "failed creating a CUDA context associated with "
823  + device::detail_::identify(device_id));
824  return handle;
825 }
826 
827 } // namespace detail_
828 
850  const device_t& device,
851  host_thread_sync_scheduling_policy_t sync_scheduling_policy = heuristic,
852  bool keep_larger_local_mem_after_resize = false);
853 
864 context_t create_and_push(
865  const device_t& device,
866  host_thread_sync_scheduling_policy_t sync_scheduling_policy = heuristic,
867  bool keep_larger_local_mem_after_resize = false);
868 
869 namespace current {
870 
876 inline context_t get()
877 {
878  auto handle = detail_::get_handle();
879  if (handle == context::detail_::none) {
880  throw ::std::runtime_error("Attempt to obtain the current CUDA context when no context is current.");
881  }
882  return context::detail_::from_handle(handle);
883 }
884 
892 inline void set(const context_t& context)
893 {
894  return detail_::set(context.handle());
895 }
896 
899 inline bool push_if_not_on_top(const context_t& context)
900 {
901  return context::current::detail_::push_if_not_on_top(context.handle());
902 }
903 
911 inline void push(const context_t& context)
912 {
913  return context::current::detail_::push(context.handle());
914 }
915 
922 inline context_t pop()
923 {
924  static constexpr const bool do_not_take_ownership { false };
925  // Unfortunately, since we don't store the device IDs of contexts
926  // on the stack, this incurs an extra API call beyond just the popping...
927  auto handle = context::current::detail_::pop();
928  auto device_id = context::detail_::get_device_id(handle);
929  return context::wrap(device_id, handle, do_not_take_ownership);
930 }
931 
932 namespace detail_ {
933 
937 handle_t push_default_if_missing();
938 
945 inline context_t get_with_fallback_push()
946 {
947  auto handle = push_default_if_missing();
948  return context::detail_::from_handle(handle);
949 }
950 
951 
952 } // namespace detail_
953 
954 } // namespace current
955 
957 bool is_primary(const context_t& context);
958 
959 namespace detail_ {
960 
961 inline ::std::string identify(const context_t& context)
962 {
963  return identify(context.handle(), context.device_id());
964 }
965 
966 } // namespace detail_
967 
968 } // namespace context
969 
980 inline void synchronize(const context_t& context)
981 {
982  context::detail_::synchronize(context.device_id(), context.handle());
983 }
984 
985 } // namespace cuda
986 
987 #endif // CUDA_API_WRAPPERS_CONTEXT_HPP_
void reset_persisting_l2_cache() const
Clear the L2 cache memory which persists between invocations of kernels.
Definition: context.hpp:616
version_t api_version() const
Returns a version number corresponding to the capabilities of this context, which can be used can use...
Definition: context.hpp:547
size_t stack_size() const
Definition: context.hpp:431
Proxy class for a CUDA stream.
Definition: stream.hpp:258
CUsharedconfig shared_memory_bank_size_t
Choice of the number of bytes in each bank of the shared memory.
Definition: context.hpp:44
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:243
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
Wrapper class for a CUDA event.
Definition: event.hpp:147
A class to create a faux member in a context_t, in lieu of an in-class namespace (which C++ does not ...
Definition: context.hpp:267
A range of priorities supported by a CUDA context; ranges from the higher numeric value to the lower...
Definition: context.hpp:50
context::limit_value_t get_limit(context::limit_t limit_id) const
Get one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:535
context::stream_priority_range_t stream_priority_range() const
Get the range of priority values one can set for streams in this context.
Definition: context.hpp:523
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
Wrapper class for a CUDA code module.
Definition: module.hpp:126
size_t amount_free() const
Amount of free global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:334
stream::priority_t least
Higher numeric value, lower priority.
Definition: context.hpp:52
CUlimit limit_t
Features of contexts which can be configured individually during a context&#39;s lifetime.
Definition: context.hpp:37
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:396
void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
Set one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:658
void set_shared_memory_bank_size(context::shared_memory_bank_size_t bank_size) const
Sets the shared memory bank size, described in this Parallel-for-all blog entry
Definition: context.hpp:637
constexpr bool is_trivial() const
When true, stream prioritization is not supported, i.e.
Definition: context.hpp:61
bool push_if_not_on_top(const context_t &context)
Push a (reference to a) context onto the top of the context stack - unless that context is already at...
Definition: context.hpp:899
void synchronize() const
Avoid executing any additional instructions on this thread until all work on all streams in this cont...
Definition: context.hpp:698
Wrappers for Runtime API functions involving versions - of the CUDA runtime and of the CUDA driver...
size_t amount_total() const
Amount of total global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:325
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:886
context::limit_value_t memory_allocation_heap_size() const
Definition: context.hpp:447
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
global_memory_type memory() const
Get a wrapper object for this context&#39;s associated device-global memory.
Definition: context.hpp:467
context_t pop()
Pop the top off of the context stack.
Definition: context.hpp:922
size_t limit_value_t
Type for the actual values for context (see limit_t for the possible kinds of limits whose value can ...
Definition: context.hpp:41
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:282
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:980
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:806
void push(const context_t &context)
Push a (reference to a) context onto the top of the context stack.
Definition: context.hpp:911
context::host_thread_sync_scheduling_policy_t sync_scheduling_policy() const
Gets the synchronization policy to be used for threads synchronizing with this CUDA context...
Definition: context.hpp:571
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
multiprocessor_cache_preference_t cache_preference() const
Determines the balance between L1 space and shared memory space set for kernels executing within this...
Definition: context.hpp:424
static version_t from_single_number(combined_version_t combined_version) noexcept
Parse the combined single-number representation, separating it.
Definition: versions.hpp:46
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
A structure representing a CUDA release version.
Definition: versions.hpp:39
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:246
size_t total_memory() const
The amount of total global device memory available to this context, including memory already allocate...
Definition: context.hpp:392
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
bool is_owning() const noexcept
Definition: context.hpp:386
context::limit_value_t maximum_outstanding_kernel_launches() const
Definition: context.hpp:477
Can only be used by the process which created it.
Definition: constants.hpp:95
size_t free_memory() const
The amount of unallocated global device memory available to this context and not yet allocated...
Definition: context.hpp:404
void set_cache_preference(multiprocessor_cache_preference_t preference) const
Controls the balance between L1 space and shared memory space for kernels executing within this conte...
Definition: context.hpp:650
bool is_primary(const context_t &context)
Definition: context.hpp:49
bool is_current() const
Definition: context.hpp:514
context::limit_value_t maximum_depth_of_child_grid_sync_calls() const
Definition: context.hpp:460
void stack_size(context::limit_value_t new_value) const
Set the limit on the size of the stack a kernel thread can use when running.
Definition: context.hpp:667
context::shared_memory_bank_size_t shared_memory_bank_size() const
Returns the shared memory bank size, as described in this Parallel-for-all blog entry ...
Definition: context.hpp:505
Wrapper class for a CUDA device.
Definition: device.hpp:135
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:755
Fundamental CUDA-related type definitions.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74
stream::priority_t greatest
Lower numeric value, higher priority.
Definition: context.hpp:55
context::limit_value_t printf_buffer_size() const
Definition: context.hpp:439