eyalroz/cuda-api-wrappers/context_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_CONTEXT_HPP_
 #define CUDA_API_WRAPPERS_CONTEXT_HPP_

 #include "current_context.hpp"
 #include "versions.hpp"
 #include "error.hpp"
 #include "constants.hpp"
 #include "types.hpp"

 #include <string>
 #include <utility>

 namespace cuda {

 class device_t;
 class event_t;
 class context_t;
 class stream_t;
 class module_t;

 namespace link {
 struct options_t;
 } // namespace link

 namespace context {

 using limit_t = CUlimit;

 using limit_value_t = size_t;

 using shared_memory_bank_size_t = CUsharedconfig;

 struct stream_priority_range_t {
     stream::priority_t least;

     stream::priority_t greatest;

     constexpr bool is_trivial() const
     {
         return least == stream::default_priority and greatest == stream::default_priority;
     }
 };

 context_t wrap(
     device::id_t       device_id,
     context::handle_t  context_id,
     bool               take_ownership = false) noexcept;

 namespace detail_ {

 ::std::string identify(const context_t& context);

 inline limit_value_t get_limit(limit_t limit_id)
 {
     limit_value_t limit_value;
     auto status = cuCtxGetLimit(&limit_value, limit_id);
     throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
     return limit_value;
 }

 inline void set_limit(limit_t limit_id, limit_value_t new_value)
 {
     auto status = cuCtxSetLimit(limit_id, new_value);
     throw_if_error_lazy(status, "Failed obtaining CUDA context limit value");
 }

 constexpr flags_t inline make_flags(
     host_thread_sync_scheduling_policy_t   sync_scheduling_policy,
     bool                                   keep_larger_local_mem_after_resize)
 {
     return
           sync_scheduling_policy // this enum value is also a valid bitmask
         | (keep_larger_local_mem_after_resize    ? CU_CTX_LMEM_RESIZE_TO_MAX : 0);
 }

 // consider renaming this: device_id_of
 inline device::id_t get_device_id(handle_t context_handle)
 {
     auto needed_push = current::detail_::push_if_not_on_top(context_handle);
     auto device_id = current::detail_::get_device_id();
     if (needed_push) {
         current::detail_::pop();
     }
     return device_id;
 }


 context_t from_handle(
     context::handle_t  context_handle,
     bool               take_ownership = false);

 inline size_t total_memory(handle_t handle)
 {
     size_t total_mem_in_bytes;
     auto status = cuMemGetInfo(nullptr, &total_mem_in_bytes);
     throw_if_error_lazy(status, "Failed determining amount of total memory for " + identify(handle));
     return total_mem_in_bytes;

 }

 inline size_t free_memory(handle_t handle)
 {
     size_t free_mem_in_bytes;
     auto status = cuMemGetInfo(&free_mem_in_bytes, nullptr);
     throw_if_error_lazy(status, "Failed determining amount of free memory for " + identify(handle));
     return free_mem_in_bytes;
 }

 inline void set_cache_preference(handle_t handle, multiprocessor_cache_preference_t preference)
 {
     auto status = cuCtxSetCacheConfig(static_cast<CUfunc_cache>(preference));
     throw_if_error_lazy(status,
         "Setting the multiprocessor L1/Shared Memory cache distribution preference to " +
         ::std::to_string(static_cast<unsigned>(preference)) + " for " + identify(handle));
 }

 inline multiprocessor_cache_preference_t cache_preference(handle_t handle)
 {
     CUfunc_cache preference;
     auto status = cuCtxGetCacheConfig(&preference);
     throw_if_error_lazy(status,
         "Obtaining the multiprocessor L1/Shared Memory cache distribution preference for " + identify(handle));
     return static_cast<multiprocessor_cache_preference_t>(preference);
 }

 #if CUDA_VERSION < 12030
 inline shared_memory_bank_size_t shared_memory_bank_size(handle_t handle)
 {
     CUsharedconfig bank_size;
     auto status = cuCtxGetSharedMemConfig(&bank_size);
     throw_if_error_lazy(status, "Obtaining the multiprocessor shared memory bank size for " + identify(handle));
     return static_cast<shared_memory_bank_size_t>(bank_size);
 }
 #endif // CUDA_VERSION < 12030

 #if CUDA_VERSION < 12030
 inline void set_shared_memory_bank_size(handle_t handle, shared_memory_bank_size_t bank_size)
 {
     auto status = cuCtxSetSharedMemConfig(static_cast<CUsharedconfig>(bank_size));
     throw_if_error_lazy(status, "Setting the multiprocessor shared memory bank size for " + identify(handle));
 }
 #endif // CUDA_VERSION < 12030


 inline void synchronize(context::handle_t handle)
 {
     CAW_SET_SCOPE_CONTEXT(handle);
     context::current::detail_::synchronize(handle);
 }

 inline void synchronize(device::id_t device_id, context::handle_t handle)
 {
     CAW_SET_SCOPE_CONTEXT(handle);
     context::current::detail_::synchronize(device_id, handle);
 }

 inline void destroy(handle_t handle)
 {
     auto status = cuCtxDestroy(handle);
     throw_if_error_lazy(status, "Failed destroying " + identify(handle));
 }

 inline void destroy(handle_t handle, device::id_t device_index)
 {
     auto status = cuCtxDestroy(handle);
     throw_if_error_lazy(status, "Failed destroying " + identify(handle, device_index));
 }

 inline context::flags_t get_flags(handle_t handle)
 {
     CAW_SET_SCOPE_CONTEXT(handle);
     return context::current::detail_::get_flags();
 }

 } // namespace detail_

 } // namespace context

 inline void synchronize(const context_t& context);

 class context_t {
 protected: // types
     using flags_type = context::flags_t;

 public: // types

     static_assert(
         ::std::is_same<::std::underlying_type<CUsharedconfig>::type, ::std::underlying_type<cudaSharedMemConfig>::type>::value,
         "Unexpected difference between enumerators used for the same purpose by the CUDA runtime and the CUDA driver");

 public: // inner classes

     class global_memory_type {
 #if CUDA_VERSION >= 11040
     public: // data types
         using execution_graph_related_attribute_t = CUgraphMem_attribute;
 #endif // CUDA_VERSION >= 11040


     protected: // data members
         const device::id_t device_id_;
         const context::handle_t context_handle_;

     public:
         global_memory_type(device::id_t device_id, context::handle_t context_handle)
             : device_id_(device_id), context_handle_(context_handle)
         {}

         device_t associated_device() const;

         context_t associated_context() const;

         memory::region_t allocate(size_t size_in_bytes);

         memory::region_t allocate_managed(
             size_t size_in_bytes,
             cuda::memory::managed::initial_visibility_t initial_visibility =
             cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access);

         size_t amount_total() const
         {
             CAW_SET_SCOPE_CONTEXT(context_handle_);
             return context::detail_::total_memory(context_handle_);
         }

         size_t amount_free() const
         {
             CAW_SET_SCOPE_CONTEXT(context_handle_);
             return context::detail_::free_memory(context_handle_);
         }

 #if CUDA_VERSION >= 11040

         void free_unused_execution_graph_memory() const
         {
             auto status = cuDeviceGraphMemTrim(device_id_);
             throw_if_error_lazy(status,
                 "Trimming memory used for CUDA execution graphs on " + device::detail_::identify(device_id_));
         }

         size_t get_execution_graph_related_attribute(execution_graph_related_attribute_t attribute) const
         {
             cuuint64_t result;
             auto status = cuDeviceGetGraphMemAttribute(device_id_, attribute, &result);
             throw_if_error_lazy(status, "Failed obtaining an execution-graph-related memory attribute for "
                                         + device::detail_::identify(device_id_));
             return result;
         }

         void reset_execution_graph_usage_high_watermark() const
         {
             cuuint64_t value_{0};
             auto status = cuDeviceSetGraphMemAttribute(device_id_, CU_GRAPH_MEM_ATTR_USED_MEM_HIGH, &value_);
             throw_if_error_lazy(status, "Failed setting an execution-graph-related memory attribute for "
                                         + device::detail_::identify(device_id_));
         }
 #endif // CUDA_VERSION >= 11040
     }; // class global_memory_type


 public: // data member non-mutator getters

     context::handle_t handle() const noexcept { return handle_; }
     device::id_t device_id() const noexcept { return device_id_; }
     device_t device() const;

     bool is_owning() const noexcept { return owning_;  }

     size_t total_memory() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::total_memory(handle_);
     }

     size_t free_memory() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::free_memory(handle_);
     }

 public: // other non-mutator methods

     stream_t default_stream() const;

     template <typename Kernel, typename ... KernelParameters>
     void launch(
         Kernel                  kernel,
         launch_configuration_t  launch_configuration,
         KernelParameters...     parameters) const;

     multiprocessor_cache_preference_t cache_preference() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::cache_preference(handle_);
     }

     size_t stack_size() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_STACK_SIZE);
     }

     context::limit_value_t printf_buffer_size() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_PRINTF_FIFO_SIZE);
     }

     context::limit_value_t memory_allocation_heap_size() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_MALLOC_HEAP_SIZE);
     }

     context::limit_value_t maximum_depth_of_child_grid_sync_calls() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH);
     }

     global_memory_type memory() const
     {
         return { device_id_, handle_ };
     }

     context::limit_value_t maximum_outstanding_kernel_launches() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT);
     }

 #if CUDA_VERSION >= 10000

     context::limit_value_t l2_fetch_granularity() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(CU_LIMIT_MAX_L2_FETCH_GRANULARITY);
     }
 #endif

 #if CUDA_VERSION < 12030

     context::shared_memory_bank_size_t shared_memory_bank_size() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::shared_memory_bank_size(handle_);
     }
 #endif // CUDA_VERSION < 12030

     bool is_current() const
     {
         return context::current::detail_::is_(handle_);
     }

     bool is_primary() const;

     context::stream_priority_range_t stream_priority_range() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         context::stream_priority_range_t result;
         auto status = cuCtxGetStreamPriorityRange(&result.least, &result.greatest);
         throw_if_error_lazy(status, "Obtaining the priority range for streams within " +
             context::detail_::identify(*this));
         return result;
     }

     context::limit_value_t get_limit(context::limit_t limit_id) const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::get_limit(limit_id);
     }

     version_t api_version() const
     {
         unsigned int raw_version;
         auto status = cuCtxGetApiVersion(handle_, &raw_version);
         throw_if_error_lazy(status, "Failed obtaining the API version for " + context::detail_::identify(*this));
         return version_t::from_single_number(static_cast<combined_version_t>(raw_version));
     }

 protected:
     context::flags_t flags() const
     {
         return context::detail_::get_flags(handle_);
     }

 public: // methods which mutate the context, but not its wrapper
     context::host_thread_sync_scheduling_policy_t sync_scheduling_policy() const
     {
         return context::host_thread_sync_scheduling_policy_t(flags() & CU_CTX_SCHED_MASK);
     }

     bool keeping_larger_local_mem_after_resize() const
     {
         return flags() & CU_CTX_LMEM_RESIZE_TO_MAX;
     }

     stream_t create_stream(
         bool                will_synchronize_with_default_stream,
         stream::priority_t  priority = cuda::stream::default_priority) const;

     event_t create_event(
         bool uses_blocking_sync = event::sync_by_busy_waiting, // Yes, that's the runtime default
         bool records_timing     = event::do_record_timings,
         bool interprocess       = event::not_interprocess) const;

     template <typename ContiguousContainer,
         cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
     module_t create_module(ContiguousContainer module_data, const link::options_t& link_options) const;

     template <typename ContiguousContainer,
         cuda::detail_::enable_if_t<detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true>
     module_t create_module(ContiguousContainer module_data) const;

 public: // Methods which don't mutate the context, but affect the device itself


     void enable_access_to(const context_t& peer) const;

     void disable_access_to(const context_t& peer) const;

     void reset_persisting_l2_cache() const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
 #if (CUDA_VERSION >= 11000)
         auto status = cuCtxResetPersistingL2Cache();
         throw_if_error_lazy(status, "Failed resetting/clearing the persisting L2 cache memory");
 #endif
         throw cuda::runtime_error(
             cuda::status::insufficient_driver,
             "Resetting/clearing the persisting L2 cache memory is not supported when compiling CUDA versions lower than 11.0");
     }

 public: // other methods which don't mutate this class as a reference, but do mutate the context

 #if CUDA_VERSION < 12030

     void set_shared_memory_bank_size(context::shared_memory_bank_size_t bank_size) const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         context::detail_::set_shared_memory_bank_size(handle_, bank_size);
     }
 #endif // CUDA_VERSION < 12030

     void set_cache_preference(multiprocessor_cache_preference_t preference) const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         context::detail_::set_cache_preference(handle_, preference);
     }

     void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
     {
         CAW_SET_SCOPE_CONTEXT(handle_);
         return context::detail_::set_limit(limit_id, new_value);
     }

     void stack_size(context::limit_value_t new_value) const
     {
         return set_limit(CU_LIMIT_STACK_SIZE, new_value);
     }

     void printf_buffer_size(context::limit_value_t new_value) const
     {
         return set_limit(CU_LIMIT_PRINTF_FIFO_SIZE, new_value);
     }

     void memory_allocation_heap_size(context::limit_value_t new_value) const
     {
         return set_limit(CU_LIMIT_MALLOC_HEAP_SIZE, new_value);
     }

     void set_maximum_depth_of_child_grid_sync_calls(context::limit_value_t new_value) const
     {
         return set_limit(CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH, new_value);
     }

     void set_maximum_outstanding_kernel_launches(context::limit_value_t new_value) const
     {
         return set_limit(CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT, new_value);
     }

     void synchronize() const
     {
         cuda::synchronize(*this);
     }

 protected: // constructors

     context_t(
         device::id_t       device_id,
         context::handle_t  context_id,
         bool               take_ownership) noexcept
         : device_id_(device_id), handle_(context_id), owning_(take_ownership)
     { }

 public: // friendship

     friend context_t context::wrap(
         device::id_t       device_id,
         context::handle_t  context_id,
         bool               take_ownership) noexcept;

 public: // constructors and destructor

     context_t(const context_t& other) :
         context_t(other.device_id_, other.handle_, false)
     { };

     context_t(context_t&& other) noexcept:
         context_t(other.device_id_, other.handle_, other.owning_)
     {
         other.owning_ = false;
     };

     ~context_t()
     {
         if (owning_) {
             cuCtxDestroy(handle_);
             // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
             // because the context cannot possibly exist after this call.
         }
     }

 public: // operators

     context_t& operator=(const context_t&) = delete;
     context_t& operator=(context_t&& other) noexcept
     {
         ::std::swap(device_id_, other.device_id_);
         ::std::swap(handle_, other.handle_);
         ::std::swap(owning_, other.owning_);
         return *this;
     }

 protected: // data members
     device::id_t       device_id_;
     context::handle_t  handle_;
     bool               owning_;
         // this field is mutable only for enabling move construction; other
         // than in that case it must not be altered

     // TODO: Should we hold a field indicating whether this context is
     // primary or not?
 };

 inline bool operator==(const context_t& lhs, const context_t& rhs) noexcept
 {
     // Note: Contexts on different devices cannot have the same context handle,
     // so this is redundant, but let's be extra safe:
     return lhs.device_id() == rhs.device_id() and lhs.handle() == rhs.handle();
 }

 inline bool operator!=(const context_t& lhs, const context_t& rhs) noexcept
 {
     return not (lhs == rhs);
 }

 namespace context {

 inline context_t wrap(
     device::id_t       device_id,
     handle_t           context_id,
     bool               take_ownership) noexcept
 {
     return { device_id, context_id, take_ownership };
 }

 namespace detail_ {

 inline context_t from_handle(
     context::handle_t  context_handle,
     bool               take_ownership)
 {
     device::id_t device_id = get_device_id(context_handle);
     return wrap(device_id, context_handle, take_ownership);
 }

 inline handle_t create_and_push(
     device::id_t                           device_id,
     host_thread_sync_scheduling_policy_t   sync_scheduling_policy = automatic,
     bool                                   keep_larger_local_mem_after_resize = false)
 {
     auto flags = context::detail_::make_flags(
         sync_scheduling_policy,
         keep_larger_local_mem_after_resize);
     handle_t handle;
     auto status = cuCtxCreate(&handle, flags, device_id);
     throw_if_error_lazy(status, "failed creating a CUDA context associated with "
         + device::detail_::identify(device_id));
     return handle;
 }

 } // namespace detail_

 context_t create(
     const device_t&                        device,
     host_thread_sync_scheduling_policy_t   sync_scheduling_policy = heuristic,
     bool                                   keep_larger_local_mem_after_resize = false);

 context_t create_and_push(
     const device_t&                        device,
     host_thread_sync_scheduling_policy_t   sync_scheduling_policy = heuristic,
     bool                                   keep_larger_local_mem_after_resize = false);

 namespace current {

 inline context_t get()
 {
     auto handle = detail_::get_handle();
     if (handle == context::detail_::none) {
         throw ::std::runtime_error("Attempt to obtain the current CUDA context when no context is current.");
     }
     return context::detail_::from_handle(handle);
 }

 inline void set(const context_t& context)
 {
     return detail_::set(context.handle());
 }

 inline bool push_if_not_on_top(const context_t& context)
 {
     return context::current::detail_::push_if_not_on_top(context.handle());
 }

 inline void push(const context_t& context)
 {
     return context::current::detail_::push(context.handle());
 }

 inline context_t pop()
 {
     static constexpr const bool do_not_take_ownership { false };
     // Unfortunately, since we don't store the device IDs of contexts
     // on the stack, this incurs an extra API call beyond just the popping...
     auto handle = context::current::detail_::pop();
     auto device_id = context::detail_::get_device_id(handle);
     return context::wrap(device_id, handle, do_not_take_ownership);
 }

 namespace detail_ {

 handle_t push_default_if_missing();

 inline context_t get_with_fallback_push()
 {
     auto handle = push_default_if_missing();
     return context::detail_::from_handle(handle);
 }


 } // namespace detail_

 } // namespace current

 bool is_primary(const context_t& context);

 namespace detail_ {

 inline ::std::string identify(const context_t& context)
 {
     return identify(context.handle(), context.device_id());
 }

 } // namespace detail_

 } // namespace context

 inline void synchronize(const context_t& context)
 {
     context::detail_::synchronize(context.device_id(), context.handle());
 }

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_CONTEXT_HPP_
cuda::link::handle_t
CUlinkState handle_t
A raw CUDA driver handle for a linking-process.
Definition: link.hpp:40

cuda::context_t::reset_persisting_l2_cache
void reset_persisting_l2_cache() const
Clear the L2 cache memory which persists between invocations of kernels.
Definition: context.hpp:611

cuda::context_t::api_version
version_t api_version() const
Returns a version number corresponding to the capabilities of this context, which can be used can use...
Definition: context.hpp:542

cuda::link::options_t
A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130

cuda::context_t::stack_size
size_t stack_size() const
Definition: context.hpp:426

cuda::stream_t
Proxy class for a CUDA stream.
Definition: stream.hpp:246

cuda::context::shared_memory_bank_size_t
CUsharedconfig shared_memory_bank_size_t
Choice of the number of bytes in each bank of the shared memory.
Definition: context.hpp:44

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::stream::priority_t
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246

cuda::memory::managed::region_t
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960

cuda::launch_configuration_t
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::event_t
Wrapper class for a CUDA event.
Definition: event.hpp:133

cuda::context_t::global_memory_type
A class to create a faux member in a context_t, in lieu of an in-class namespace (which C++ does not ...
Definition: context.hpp:262

cuda::context::stream_priority_range_t
A range of priorities supported by a CUDA context; ranges from the higher numeric value to the lower...
Definition: context.hpp:50

cuda::context_t::get_limit
context::limit_value_t get_limit(context::limit_t limit_id) const
Get one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:530

cuda::context_t::stream_priority_range
context::stream_priority_range_t stream_priority_range() const
Get the range of priority values one can set for streams in this context.
Definition: context.hpp:518

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::module_t
Wrapper class for a CUDA code module.
Definition: module.hpp:123

cuda::context_t::global_memory_type::amount_free
size_t amount_free() const
Amount of free global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:329

cuda::context::stream_priority_range_t::least
stream::priority_t least
Higher numeric value, lower priority.
Definition: context.hpp:52

cuda::context::limit_t
CUlimit limit_t
Features of contexts which can be configured individually during a context&#39;s lifetime.
Definition: context.hpp:37

cuda::launch
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394

cuda::context_t::set_limit
void set_limit(context::limit_t limit_id, context::limit_value_t new_value) const
Set one of the configurable limits for this context (and events, streams, kernels, etc.
Definition: context.hpp:653

cuda::context_t::set_shared_memory_bank_size
void set_shared_memory_bank_size(context::shared_memory_bank_size_t bank_size) const
Sets the shared memory bank size, described in this Parallel-for-all blog entry
Definition: context.hpp:632

cuda::context::stream_priority_range_t::is_trivial
constexpr bool is_trivial() const
When true, stream prioritization is not supported, i.e.
Definition: context.hpp:61

cuda::context::current::push_if_not_on_top
bool push_if_not_on_top(const context_t &context)
Push a (reference to a) context onto the top of the context stack - unless that context is already at...
Definition: context.hpp:887

cuda::context_t::synchronize
void synchronize() const
Avoid executing any additional instructions on this thread until all work on all streams in this cont...
Definition: context.hpp:693

versions.hpp
Wrappers for Runtime API functions involving versions - of the CUDA runtime and of the CUDA driver...

cuda::context_t::global_memory_type::amount_total
size_t amount_total() const
Amount of total global memory on the CUDA device&#39;s primary context.
Definition: context.hpp:320

cuda::context::host_thread_sync_scheduling_policy_t
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884

cuda::context_t::memory_allocation_heap_size
context::limit_value_t memory_allocation_heap_size() const
Definition: context.hpp:442

current_context.hpp

cuda::size_t
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81

cuda::context_t::memory
global_memory_type memory() const
Get a wrapper object for this context&#39;s associated device-global memory.
Definition: context.hpp:462

cuda::context::current::pop
context_t pop()
Pop the top off of the context stack.
Definition: context.hpp:910

cuda::context::limit_value_t
size_t limit_value_t
Type for the actual values for context (see limit_t for the possible kinds of limits whose value can ...
Definition: context.hpp:41

cuda::runtime_error
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271

cuda::synchronize
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968

cuda::multiprocessor_cache_preference_t
multiprocessor_cache_preference_t
L1-vs-shared-memory balance option.
Definition: types.hpp:804

cuda::context::current::push
void push(const context_t &context)
Push a (reference to a) context onto the top of the context stack.
Definition: context.hpp:899

cuda::link::wrap
link_t wrap(device::id_t device_id, context::handle_t context_handle, link::handle_t handle, const link::options_t &options, bool take_ownership=false) noexcept
Wrap an existing CUDA link-process in a link_t wrapper class instance.
Definition: link.hpp:281

cuda::context_t::sync_scheduling_policy
context::host_thread_sync_scheduling_policy_t sync_scheduling_policy() const
Gets the synchronization policy to be used for threads synchronizing with this CUDA context...
Definition: context.hpp:566

throw_if_error_lazy
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316

cuda::operator==
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762

cuda::context_t::cache_preference
multiprocessor_cache_preference_t cache_preference() const
Determines the balance between L1 space and shared memory space set for kernels executing within this...
Definition: context.hpp:419

cuda::version_t::from_single_number
static version_t from_single_number(combined_version_t combined_version) noexcept
Parse the combined single-number representation, separating it.
Definition: versions.hpp:46

error.hpp
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...

cuda::version_t
A structure representing a CUDA release version.
Definition: versions.hpp:39

cuda::event::interprocess
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96

constants.hpp
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...

cuda::stream::default_priority
the scheduling priority of a stream created without specifying any other priority value ...
Definition: types.hpp:249

cuda::context_t::total_memory
size_t total_memory() const
The amount of total global device memory available to this context, including memory already allocate...
Definition: context.hpp:387

cuda::event::sync_by_busy_waiting
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70

cuda::context_t::is_owning
bool is_owning() const noexcept
Definition: context.hpp:381

cuda::context_t::maximum_outstanding_kernel_launches
context::limit_value_t maximum_outstanding_kernel_launches() const
Definition: context.hpp:472

cuda::event::not_interprocess
Can only be used by the process which created it.
Definition: constants.hpp:95

cuda::context_t::free_memory
size_t free_memory() const
The amount of unallocated global device memory available to this context and not yet allocated...
Definition: context.hpp:399

cuda::context_t::set_cache_preference
void set_cache_preference(multiprocessor_cache_preference_t preference) const
Controls the balance between L1 space and shared memory space for kernels executing within this conte...
Definition: context.hpp:645

cuda::context::is_primary
bool is_primary(const context_t &context)
Definition: context.hpp:51

cuda::context_t::is_current
bool is_current() const
Definition: context.hpp:509

cuda::context_t::maximum_depth_of_child_grid_sync_calls
context::limit_value_t maximum_depth_of_child_grid_sync_calls() const
Definition: context.hpp:455

cuda::context_t::stack_size
void stack_size(context::limit_value_t new_value) const
Set the limit on the size of the stack a kernel thread can use when running.
Definition: context.hpp:662

cuda::context_t::shared_memory_bank_size
context::shared_memory_bank_size_t shared_memory_bank_size() const
Returns the shared memory bank size, as described in this Parallel-for-all blog entry ...
Definition: context.hpp:500

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

cuda::memory::managed::initial_visibility_t
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753

types.hpp
Fundamental CUDA-related type definitions.

cuda::context::stream_priority_range_t::greatest
stream::priority_t greatest
Lower numeric value, higher priority.
Definition: context.hpp:55

cuda::context_t::printf_buffer_size
context::limit_value_t printf_buffer_size() const
Definition: context.hpp:434