cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
context.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_CONTEXT_HPP_
8 #define CUDA_API_WRAPPERS_CONTEXT_HPP_
9 
10 #include "current_context.hpp"
11 #include "versions.hpp"
12 #include "error.hpp"
13 #include "constants.hpp"
14 #include "types.hpp"
15 
16 #include <string>
17 #include <utility>
18 
19 namespace cuda {
20 
22 class device_t;
23 class event_t;
24 class context_t;
25 class stream_t;
26 class module_t;
28 
29 namespace link {
30 struct options_t;
31 } // namespace link
32 
33 namespace context {
34 
37 using limit_t = CUlimit;
38 
42 
44 using shared_memory_bank_size_t = CUsharedconfig;
45 
53 
56 
61  constexpr bool is_trivial() const
62  {
63  return least == stream::default_priority and greatest == stream::default_priority;
64  }
65 };
66 
84  device::id_t device_id,
85  context::handle_t context_id,
86  bool take_ownership = false) noexcept;
87 
88 namespace detail_ {
89 
90 ::std::string identify(const context_t& context);
91 
92 inline limit_value_t get_limit(limit_t limit_id)
93 {
94  limit_value_t limit_value;
95  auto status = cuCtxGetLimit(&limit_value, limit_id);
96  throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
97  return limit_value;
98 }
99 
100 inline void set_limit(limit_t limit_id, limit_value_t new_value)
101 {
102  auto status = cuCtxSetLimit(limit_id, new_value);
103  throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
104 }
105 
106 constexpr flags_t inline make_flags(
107  host_thread_sync_scheduling_policy_t sync_scheduling_policy,
108  bool keep_larger_local_mem_after_resize)
109 {
110  return
111  sync_scheduling_policy // this enum value is also a valid bitmask
112  | (keep_larger_local_mem_after_resize ? CU_CTX_LMEM_RESIZE_TO_MAX : 0);
113 }
114 
115 // consider renaming this: device_id_of
116 inline device::id_t get_device_id(handle_t context_handle)
117 {
118  auto needed_push = current::detail_::push_if_not_on_top(context_handle);
119  auto device_id = current::detail_::get_device_id();
120  if (needed_push) {
121  current::detail_::pop();
122  }
123  return device_id;
124 }
125 
126 
127 context_t from_handle(
128  context::handle_t context_handle,
129  bool take_ownership = false);
130 
131 inline size_t total_memory(handle_t handle)
132 {
133  size_t total_mem_in_bytes;
134  auto status = cuMemGetInfo(nullptr, &total_mem_in_bytes);
135  throw_if_error_lazy(status, "Failed determining amount of total memory for " + identify(handle));
136  return total_mem_in_bytes;
137 
138 }
139 
140 inline size_t free_memory(handle_t handle)
141 {
142  size_t free_mem_in_bytes;
143  auto status = cuMemGetInfo(&free_mem_in_bytes, nullptr);
144  throw_if_error_lazy(status, "Failed determining amount of free memory for " + identify(handle));
145  return free_mem_in_bytes;
146 }
147 
148 inline void set_cache_preference(handle_t handle, multiprocessor_cache_preference_t preference)
149 {
150  auto status = cuCtxSetCacheConfig(static_cast<CUfunc_cache>(preference));
151  throw_if_error_lazy(status,
152  "Setting the multiprocessor L1/Shared Memory cache distribution preference to " +
153  ::std::to_string(static_cast<unsigned>(preference)) + " for " + identify(handle));
154 }
155 
156 inline multiprocessor_cache_preference_t cache_preference(handle_t handle)
157 {
158  CUfunc_cache preference;
159  auto status = cuCtxGetCacheConfig(&preference);
160  throw_if_error_lazy(status,
161  "Obtaining the multiprocessor L1/Shared Memory cache distribution preference for " + identify(handle));
162  return static_cast<multiprocessor_cache_preference_t>(preference);
163 }
164 
165 #if CUDA_VERSION < 12030
166 inline shared_memory_bank_size_t shared_memory_bank_size(handle_t handle)
167 {
168  CUsharedconfig bank_size;
169  auto status = cuCtxGetSharedMemConfig(&bank_size);
170  throw_if_error_lazy(status, "Obtaining the multiprocessor shared memory bank size for " + identify(handle));
171  return static_cast<shared_memory_bank_size_t>(bank_size);
172 }
173 #endif // CUDA_VERSION < 12030
174 
175 #if CUDA_VERSION < 12030
176 inline void set_shared_memory_bank_size(handle_t handle, shared_memory_bank_size_t bank_size)
177 {
178  auto status = cuCtxSetSharedMemConfig(static_cast<CUsharedconfig>(bank_size));
179  throw_if_error_lazy(status, "Setting the multiprocessor shared memory bank size for " + identify(handle));
180 }
181 #endif // CUDA_VERSION < 12030
182 
183 
184 inline void synchronize(context::handle_t handle)
185 {
186  CAW_SET_SCOPE_CONTEXT(handle);
187  context::current::detail_::synchronize(handle);
188 }
189 
190 inline void synchronize(device::id_t device_id, context::handle_t handle)
191 {
192  CAW_SET_SCOPE_CONTEXT(handle);
193  context::current::detail_::synchronize(device_id, handle);
194 }
195 
196 inline void destroy(handle_t handle)
197 {
198  auto status = cuCtxDestroy(handle);
199  throw_if_error_lazy(status, "Failed destroying " + identify(handle));
200 }
201 
202 inline void destroy(handle_t handle, device::id_t device_index)
203 {
204  auto status = cuCtxDestroy(handle);
205  throw_if_error_lazy(status, "Failed destroying " + identify(handle, device_index));
206 }
207 
208 inline context::flags_t get_flags(handle_t handle)
209 {
210  CAW_SET_SCOPE_CONTEXT(handle);
211  return context::current::detail_::get_flags();
212 }
213 
214 } // namespace detail_
215 
216 } // namespace context
217 
227 inline void synchronize(const context_t& context);
228 
244 class context_t {
245 protected: // types
247  using flags_type = context::flags_t;
248 
249 public: // types
250 
251  static_assert(
252  ::std::is_same<::std::underlying_type<CUsharedconfig>::type, ::std::underlying_type<cudaSharedMemConfig>::type>::value,
253  "Unexpected difference between enumerators used for the same purpose by the CUDA runtime and the CUDA driver");
254 
255 public: // inner classes
256 
263 #if CUDA_VERSION >= 11040
264  public: // data types
265  using execution_graph_related_attribute_t = CUgraphMem_attribute;
266 #endif // CUDA_VERSION >= 11040
267 
268 
269  protected: // data members
270  const device::id_t device_id_;
271  const context::handle_t context_handle_;
272 
273  public:
274  global_memory_type(device::id_t device_id, context::handle_t context_handle)
275  : device_id_(device_id), context_handle_(context_handle)
276  {}
277 
279  device_t associated_device() const;
280 
282  context_t associated_context() const;
283 
290  memory::region_t allocate(size_t size_in_bytes);
291 
312  memory::region_t allocate_managed(
313  size_t size_in_bytes,
315  cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access);
316 
320  size_t amount_total() const
321  {
322  CAW_SET_SCOPE_CONTEXT(context_handle_);
323  return context::detail_::total_memory(context_handle_);
324  }
325 
329  size_t amount_free() const
330  {
331  CAW_SET_SCOPE_CONTEXT(context_handle_);
332  return context::detail_::free_memory(context_handle_);
333  }
334 
335 #if CUDA_VERSION >= 11040
336 
341  void free_unused_execution_graph_memory() const
343  {
344  auto status = cuDeviceGraphMemTrim(device_id_);
345  throw_if_error_lazy(status,
346  "Trimming memory used for CUDA execution graphs on " + device::detail_::identify(device_id_));
347  }
348 
352  size_t get_execution_graph_related_attribute(execution_graph_related_attribute_t attribute) const
353  {
354  cuuint64_t result;
355  auto status = cuDeviceGetGraphMemAttribute(device_id_, attribute, &result);
356  throw_if_error_lazy(status, "Failed obtaining an execution-graph-related memory attribute for "
357  + device::detail_::identify(device_id_));
358  return result;
359  }
360 
361  void reset_execution_graph_usage_high_watermark() const
362  {
363  cuuint64_t value_{0};
364  auto status = cuDeviceSetGraphMemAttribute(device_id_, CU_GRAPH_MEM_ATTR_USED_MEM_HIGH, &value_);
365  throw_if_error_lazy(status, "Failed setting an execution-graph-related memory attribute for "
366  + device::detail_::identify(device_id_));
367  }
369 #endif // CUDA_VERSION >= 11040
370  }; // class global_memory_type
371 
372 
373 public: // data member non-mutator getters
374 
375  context::handle_t handle() const noexcept { return handle_; }
376  device::id_t device_id() const noexcept { return device_id_; }
377  device_t device() const;
378 
381  bool is_owning() const noexcept { return owning_; }
382 
387  size_t total_memory() const
388  {
389  CAW_SET_SCOPE_CONTEXT(handle_);
390  return context::detail_::total_memory(handle_);
391  }
392 
399  size_t free_memory() const
400  {
401  CAW_SET_SCOPE_CONTEXT(handle_);
402  return context::detail_::free_memory(handle_);
403  }
404 
405 public: // other non-mutator methods
406 
407  stream_t default_stream() const;
408 
409  template <typename Kernel, typename ... KernelParameters>
410  void launch(
411  Kernel kernel,
412  launch_configuration_t launch_configuration,
413  KernelParameters... parameters) const;
414 
420  {
421  CAW_SET_SCOPE_CONTEXT(handle_);
422  return context::detail_::cache_preference(handle_);
423  }
424 
426  size_t stack_size() const
427  {
428  CAW_SET_SCOPE_CONTEXT(handle_);
429  return context::detail_::get_limit(CU_LIMIT_STACK_SIZE);
430  }
431 
435  {
436  CAW_SET_SCOPE_CONTEXT(handle_);
437  return context::detail_::get_limit(CU_LIMIT_PRINTF_FIFO_SIZE);
438  }
439 
443  {
444  CAW_SET_SCOPE_CONTEXT(handle_);
445  return context::detail_::get_limit(CU_LIMIT_MALLOC_HEAP_SIZE);
446  }
447 
456  {
457  CAW_SET_SCOPE_CONTEXT(handle_);
458  return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH);
459  }
460 
463  {
464  return { device_id_, handle_ };
465  }
466 
473  {
474  CAW_SET_SCOPE_CONTEXT(handle_);
475  return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT);
476  }
477 
478 #if CUDA_VERSION >= 10000
479 
486  context::limit_value_t l2_fetch_granularity() const
487  {
488  CAW_SET_SCOPE_CONTEXT(handle_);
489  return context::detail_::get_limit(CU_LIMIT_MAX_L2_FETCH_GRANULARITY);
490  }
491 #endif
492 
493 #if CUDA_VERSION < 12030
494 
501  {
502  CAW_SET_SCOPE_CONTEXT(handle_);
503  return context::detail_::shared_memory_bank_size(handle_);
504  }
505 #endif // CUDA_VERSION < 12030
506 
509  bool is_current() const
510  {
511  return context::current::detail_::is_(handle_);
512  }
513 
515  bool is_primary() const;
516 
519  {
520  CAW_SET_SCOPE_CONTEXT(handle_);
522  auto status = cuCtxGetStreamPriorityRange(&result.least, &result.greatest);
523  throw_if_error_lazy(status, "Obtaining the priority range for streams within " +
524  context::detail_::identify(*this));
525  return result;
526  }
527 
531  {
532  CAW_SET_SCOPE_CONTEXT(handle_);
533  return context::detail_::get_limit(limit_id);
534  }
535 
543  {
544  unsigned int raw_version;
545  auto status = cuCtxGetApiVersion(handle_, &raw_version);
546  throw_if_error_lazy(status, "Failed obtaining the API version for " + context::detail_::identify(*this));
547  return version_t::from_single_number(static_cast<combined_version_t>(raw_version));
548  }
549 
550 protected:
552  context::flags_t flags() const
553  {
554  return context::detail_::get_flags(handle_);
555  }
557 
558 public: // methods which mutate the context, but not its wrapper
567  {
568  return context::host_thread_sync_scheduling_policy_t(flags() & CU_CTX_SCHED_MASK);
569  }
570 
571  bool keeping_larger_local_mem_after_resize() const
572  {
573  return flags() & CU_CTX_LMEM_RESIZE_TO_MAX;
574  }
575 
578  stream_t create_stream(
579  bool will_synchronize_with_default_stream,
581 
584  event_t create_event(
585  bool uses_blocking_sync = event::sync_by_busy_waiting, // Yes, that's the runtime default
586  bool records_timing = event::do_record_timings,
588 
592  template <typename ContiguousContainer,
593  cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
594  module_t create_module(ContiguousContainer module_data, const link::options_t& link_options) const;
595 
596  template <typename ContiguousContainer,
597  cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
598  module_t create_module(ContiguousContainer module_data) const;
600 
601 public: // Methods which don't mutate the context, but affect the device itself
602 
603 
605  void enable_access_to(const context_t& peer) const;
606 
608  void disable_access_to(const context_t& peer) const;
609 
612  {
613  CAW_SET_SCOPE_CONTEXT(handle_);
614 #if (CUDA_VERSION >= 11000)
615  auto status = cuCtxResetPersistingL2Cache();
616  throw_if_error_lazy(status, "Failed resetting/clearing the persisting L2 cache memory");
617 #endif
618  throw cuda::runtime_error(
619  cuda::status::insufficient_driver,
620  "Resetting/clearing the persisting L2 cache memory is not supported when compiling CUDA versions lower than 11.0");
621  }
622 
623 public: // other methods which don't mutate this class as a reference, but do mutate the context
624 
625 #if CUDA_VERSION < 12030
626 
633  {
634  CAW_SET_SCOPE_CONTEXT(handle_);
635  context::detail_::set_shared_memory_bank_size(handle_, bank_size);
636  }
637 #endif // CUDA_VERSION < 12030
638 
646  {
647  CAW_SET_SCOPE_CONTEXT(handle_);
648  context::detail_::set_cache_preference(handle_, preference);
649  }
650 
653  void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
654  {
655  CAW_SET_SCOPE_CONTEXT(handle_);
656  return context::detail_::set_limit(limit_id, new_value);
657  }
658 
662  void stack_size(context::limit_value_t new_value) const
663  {
664  return set_limit(CU_LIMIT_STACK_SIZE, new_value);
665  }
666 
667  void printf_buffer_size(context::limit_value_t new_value) const
668  {
669  return set_limit(CU_LIMIT_PRINTF_FIFO_SIZE, new_value);
670  }
671 
672  void memory_allocation_heap_size(context::limit_value_t new_value) const
673  {
674  return set_limit(CU_LIMIT_MALLOC_HEAP_SIZE, new_value);
675  }
676 
677  void set_maximum_depth_of_child_grid_sync_calls(context::limit_value_t new_value) const
678  {
679  return set_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH, new_value);
680  }
681 
682  void set_maximum_outstanding_kernel_launches(context::limit_value_t new_value) const
683  {
684  return set_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT, new_value);
685  }
686 
693  void synchronize() const
694  {
695  cuda::synchronize(*this);
696  }
697 
698 protected: // constructors
699 
700  context_t(
701  device::id_t device_id,
702  context::handle_t context_id,
703  bool take_ownership) noexcept
704  : device_id_(device_id), handle_(context_id), owning_(take_ownership)
705  { }
706 
707 public: // friendship
708 
710  friend context_t context::wrap(
711  device::id_t device_id,
712  context::handle_t context_id,
713  bool take_ownership) noexcept;
715 
716 public: // constructors and destructor
717 
718  context_t(const context_t& other) :
719  context_t(other.device_id_, other.handle_, false)
720  { };
721 
722  context_t(context_t&& other) noexcept:
723  context_t(other.device_id_, other.handle_, other.owning_)
724  {
725  other.owning_ = false;
726  };
727 
728  ~context_t()
729  {
730  if (owning_) {
731  cuCtxDestroy(handle_);
732  // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
733  // because the context cannot possibly exist after this call.
734  }
735  }
736 
737 public: // operators
738 
739  context_t& operator=(const context_t&) = delete;
740  context_t& operator=(context_t&& other) noexcept
741  {
742  ::std::swap(device_id_, other.device_id_);
743  ::std::swap(handle_, other.handle_);
744  ::std::swap(owning_, other.owning_);
745  return *this;
746  }
747 
748 protected: // data members
749  device::id_t device_id_;
750  context::handle_t handle_;
752  bool owning_;
753  // this field is mutable only for enabling move construction; other
754  // than in that case it must not be altered
755 
756  // TODO: Should we hold a field indicating whether this context is
757  // primary or not?
758 };
759 
762 inline bool operator==(const context_t& lhs, const context_t& rhs) noexcept
763 {
764  // Note: Contexts on different devices cannot have the same context handle,
765  // so this is redundant, but let's be extra safe:
766  return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
767 }
768 
769 inline bool operator!=(const context_t& lhs, const context_t& rhs) noexcept
770 {
771  return not (lhs == rhs);
772 }
774 
775 namespace context {
776 
780 inline context_t wrap(
783  device::id_t device_id,
784  handle_t context_id,
785  bool take_ownership) noexcept
786 {
787  return { device_id, context_id, take_ownership };
788 }
789 
790 namespace detail_ {
791 
792 inline context_t from_handle(
793  context::handle_t context_handle,
794  bool take_ownership)
795 {
796  device::id_t device_id = get_device_id(context_handle);
797  return wrap(device_id, context_handle, take_ownership);
798 }
799 
800 inline handle_t create_and_push(
801  device::id_t device_id,
802  host_thread_sync_scheduling_policy_t sync_scheduling_policy = automatic,
803  bool keep_larger_local_mem_after_resize = false)
804 {
805  auto flags = context::detail_::make_flags(
806  sync_scheduling_policy,
807  keep_larger_local_mem_after_resize);
808  handle_t handle;
809  auto status = cuCtxCreate(&handle, flags, device_id);
810  throw_if_error_lazy(status, "failed creating a CUDA context associated with "
811  + device::detail_::identify(device_id));
812  return handle;
813 }
814 
815 } // namespace detail_
816 
837 context_t create(
838  const device_t& device,
839  host_thread_sync_scheduling_policy_t sync_scheduling_policy = heuristic,
840  bool keep_larger_local_mem_after_resize = false);
841 
852 context_t create_and_push(
853  const device_t& device,
854  host_thread_sync_scheduling_policy_t sync_scheduling_policy = heuristic,
855  bool keep_larger_local_mem_after_resize = false);
856 
857 namespace current {
858 
864 inline context_t get()
865 {
866  auto handle = detail_::get_handle();
867  if (handle == context::detail_::none) {
868  throw ::std::runtime_error("Attempt to obtain the current CUDA context when no context is current.");
869  }
870  return context::detail_::from_handle(handle);
871 }
872 
880 inline void set(const context_t& context)
881 {
882  return detail_::set(context.handle());
883 }
884 
887 inline bool push_if_not_on_top(const context_t& context)
888 {
889  return context::current::detail_::push_if_not_on_top(context.handle());
890 }
891 
899 inline void push(const context_t& context)
900 {
901  return context::current::detail_::push(context.handle());
902 }
903 
910 inline context_t pop()
911 {
912  static constexpr const bool do_not_take_ownership { false };
913  // Unfortunately, since we don't store the device IDs of contexts
914  // on the stack, this incurs an extra API call beyond just the popping...
915  auto handle = context::current::detail_::pop();
916  auto device_id = context::detail_::get_device_id(handle);
917  return context::wrap(device_id, handle, do_not_take_ownership);
918 }
919 
920 namespace detail_ {
921 
925 handle_t push_default_if_missing();
926 
933 inline context_t get_with_fallback_push()
934 {
935  auto handle = push_default_if_missing();
936  return context::detail_::from_handle(handle);
937 }
938 
939 
940 } // namespace detail_
941 
942 } // namespace current
943 
945 bool is_primary(const context_t& context);
946 
947 namespace detail_ {
948 
949 inline ::std::string identify(const context_t& context)
950 {
951  return identify(context.handle(), context.device_id());
952 }
953 
954 } // namespace detail_
955 
956 } // namespace context
957 
968 inline void synchronize(const context_t& context)
969 {
970  context::detail_::synchronize(context.device_id(), context.handle());
971 }
972 
973 } // namespace cuda
974 
975 #endif // CUDA_API_WRAPPERS_CONTEXT_HPP_
void reset_persisting_l2_cache() const
Clear the L2 cache memory which persists between invocations of kernels.
Definition: context.hpp:611
version_t api_version() const
Returns a version number corresponding to the capabilities of this context, which can be used can use...
Definition: context.hpp:542
size_t stack_size() const
Definition: context.hpp:426
Proxy class for a CUDA stream.
Definition: stream.hpp:246
CUsharedconfig shared_memory_bank_size_t
Choice of the number of bytes in each bank of the shared memory.
Definition: context.hpp:44
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
Wrapper class for a CUDA event.
Definition: event.hpp:133
A class to create a faux member in a context_t, in lieu of an in-class namespace (which C++ does not ...
Definition: context.hpp:262
A range of priorities supported by a CUDA context; ranges from the higher numeric value to the lower...
Definition: context.hpp:50
context::limit_value_t get_limit(context::limit_t limit_id) const
Get one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:530
context::stream_priority_range_t stream_priority_range() const
Get the range of priority values one can set for streams in this context.
Definition: context.hpp:518
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
Wrapper class for a CUDA code module.
Definition: module.hpp:123
size_t amount_free() const
Amount of free global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:329
stream::priority_t least
Higher numeric value, lower priority.
Definition: context.hpp:52
CUlimit limit_t
Features of contexts which can be configured individually during a context&#39;s lifetime.
Definition: context.hpp:37
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
Set one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:653
void set_shared_memory_bank_size(context::shared_memory_bank_size_t bank_size) const
Sets the shared memory bank size, described in this Parallel-for-all blog entry
Definition: context.hpp:632
constexpr bool is_trivial() const
When true, stream prioritization is not supported, i.e.
Definition: context.hpp:61
bool push_if_not_on_top(const context_t &context)
Push a (reference to a) context onto the top of the context stack - unless that context is already at...
Definition: context.hpp:887
void synchronize() const
Avoid executing any additional instructions on this thread until all work on all streams in this cont...
Definition: context.hpp:693
Wrappers for Runtime API functions involving versions - of the CUDA runtime and of the CUDA driver...
size_t amount_total() const
Amount of total global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:320
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884
context::limit_value_t memory_allocation_heap_size() const
Definition: context.hpp:442
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
global_memory_type memory() const
Get a wrapper object for this context&#39;s associated device-global memory.
Definition: context.hpp:462
context_t pop()
Pop the top off of the context stack.
Definition: context.hpp:910
size_t limit_value_t
Type for the actual values for context (see limit_t for the possible kinds of limits whose value can ...
Definition: context.hpp:41
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:804
void push(const context_t &context)
Push a (reference to a) context onto the top of the context stack.
Definition: context.hpp:899
context::host_thread_sync_scheduling_policy_t sync_scheduling_policy() const
Gets the synchronization policy to be used for threads synchronizing with this CUDA context...
Definition: context.hpp:566
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
multiprocessor_cache_preference_t cache_preference() const
Determines the balance between L1 space and shared memory space set for kernels executing within this...
Definition: context.hpp:419
static version_t from_single_number(combined_version_t combined_version) noexcept
Parse the combined single-number representation, separating it.
Definition: versions.hpp:46
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
A structure representing a CUDA release version.
Definition: versions.hpp:39
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:249
size_t total_memory() const
The amount of total global device memory available to this context, including memory already allocate...
Definition: context.hpp:387
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
bool is_owning() const noexcept
Definition: context.hpp:381
context::limit_value_t maximum_outstanding_kernel_launches() const
Definition: context.hpp:472
Can only be used by the process which created it.
Definition: constants.hpp:95
size_t free_memory() const
The amount of unallocated global device memory available to this context and not yet allocated...
Definition: context.hpp:399
void set_cache_preference(multiprocessor_cache_preference_t preference) const
Controls the balance between L1 space and shared memory space for kernels executing within this conte...
Definition: context.hpp:645
bool is_primary(const context_t &context)
Definition: context.hpp:51
bool is_current() const
Definition: context.hpp:509
context::limit_value_t maximum_depth_of_child_grid_sync_calls() const
Definition: context.hpp:455
void stack_size(context::limit_value_t new_value) const
Set the limit on the size of the stack a kernel thread can use when running.
Definition: context.hpp:662
context::shared_memory_bank_size_t shared_memory_bank_size() const
Returns the shared memory bank size, as described in this Parallel-for-all blog entry ...
Definition: context.hpp:500
Wrapper class for a CUDA device.
Definition: device.hpp:135
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753
Fundamental CUDA-related type definitions.
stream::priority_t greatest
Lower numeric value, higher priority.
Definition: context.hpp:55
context::limit_value_t printf_buffer_size() const
Definition: context.hpp:434