eyalroz/cuda-api-wrappers/multi__wrapper__impls_2context_8hpp_source.html

 #pragma once
 #ifndef MULTI_WRAPPER_IMPLS_CONTEXT_HPP_
 #define MULTI_WRAPPER_IMPLS_CONTEXT_HPP_

 #include "../device.hpp"
 #include "../stream.hpp"
 #include "../event.hpp"
 #include "../kernel.hpp"
 #include "../virtual_memory.hpp"
 #include "../current_context.hpp"
 #include "../current_device.hpp"
 #include "../peer_to_peer.hpp"
 #include "../memory.hpp"
 #include "../context.hpp"


 namespace cuda {

 namespace context {

 namespace detail_ {

 inline handle_t get_primary_for_same_device(handle_t handle, bool increase_refcount)
 {
     auto device_id = get_device_id(handle);
     return device::primary_context::detail_::get_handle(device_id, increase_refcount);
 }

 inline bool is_primary_for_device(handle_t handle, device::id_t device_id)
 {
     auto context_device_id = context::detail_::get_device_id(handle);
     if (context_device_id != device_id) {
         return false;
     }
     static constexpr const bool dont_increase_refcount { false };
     auto pc_handle = device::primary_context::detail_::get_handle(device_id, dont_increase_refcount);
     return handle == pc_handle;
 }

 } // namespace detail

 inline bool is_primary(const context_t& context)
 {
     return context::detail_::is_primary_for_device(context.handle(), context.device_id());
 }

 inline void synchronize(const context_t& context)
 {
     return detail_::synchronize(context.device_id(), context.handle());
 }

 namespace current {

 namespace detail_ {

 inline bool is_primary(handle_t cc_handle, device::id_t current_context_device_id)
 {
     // Note we assume current_context_device_id really is the device ID for cc_handle;
     // otherwise we could just use is_primary_for_device()
     return cc_handle == device::primary_context::detail_::get_handle(current_context_device_id);
 }

 } // namespace detail_

 inline bool is_primary()
 {
     auto current_context = get();
     return detail_::is_primary(current_context.handle(), current_context.device_id());
 }

 namespace detail_ {

 inline scoped_override_t::scoped_override_t(bool hold_primary_context_ref_unit, device::id_t device_id, handle_t context_handle)
 : hold_primary_context_ref_unit_(hold_primary_context_ref_unit), device_id_or_0_(device_id)
 {
     if (hold_primary_context_ref_unit) { device::primary_context::detail_::increase_refcount(device_id); }
     push(context_handle);
 }

 inline scoped_override_t::~scoped_override_t() noexcept(false)
 {
     if (hold_primary_context_ref_unit_) { device::primary_context::detail_::decrease_refcount(device_id_or_0_); }
     pop();
 }


 inline handle_t push_default_if_missing()
 {
     auto handle = detail_::get_handle();
     if (handle != context::detail_::none) {
         return handle;
     }
     // TODO: consider using cudaSetDevice here instead
     auto current_device_id = device::current::detail_::get_id();
     auto pc_handle = device::primary_context::detail_::obtain_and_increase_refcount(current_device_id);
     push(pc_handle);
     return pc_handle;
 }

 class scoped_existence_ensurer_t {
 public:
     context::handle_t context_handle;
     device::id_t device_id_;
     bool decrease_pc_refcount_on_destruct_;

     explicit scoped_existence_ensurer_t(bool avoid_pc_refcount_increase = true)
     {
         auto status_and_handle = get_with_status();
         if (status_and_handle.status == cuda::status::not_yet_initialized) {
             context_handle = context::detail_::none;
             initialize_driver(); // and the handle
         }
         else {
             context_handle = status_and_handle.handle;
         }
         if (context_handle == context::detail_::none) {
             device_id_ = device::current::detail_::get_id();
             context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device_id_);
             context::current::detail_::push(context_handle);
             decrease_pc_refcount_on_destruct_ = avoid_pc_refcount_increase;
         }
         else {
             // Some compilers fail to detect that device_id is never used
             // unless it's initialized, and thus warns us of maybe-uninitialized
             // use, so...
             device_id_ = 0;
             decrease_pc_refcount_on_destruct_ = false;
         }
     }

     ~scoped_existence_ensurer_t()
     {
         if (context_handle != context::detail_::none and decrease_pc_refcount_on_destruct_) {
             context::current::detail_::pop();
             device::primary_context::detail_::decrease_refcount(device_id_);
         }
     }
 };

 } // namespace detail_

 inline scoped_override_t::scoped_override_t(device::primary_context_t&& primary_context)
         : parent(primary_context.is_owning(), primary_context.device_id(), primary_context.handle()) {}
 inline scoped_override_t::scoped_override_t(const context_t& context) : parent(context.handle()) {}
 inline scoped_override_t::scoped_override_t(context_t&& context) : parent(context.handle()) {}

 } // namespace current

 inline context_t create_and_push(
     const device_t&                       device,
     host_thread_sync_scheduling_policy_t  sync_scheduling_policy,
     bool                                  keep_larger_local_mem_after_resize)
 {
     auto handle = detail_::create_and_push(device.id(), sync_scheduling_policy, keep_larger_local_mem_after_resize);
     bool take_ownership = true;
     return context::wrap(device.id(), handle, take_ownership);
 }

 inline context_t create(
     const device_t&                        device,
     host_thread_sync_scheduling_policy_t   sync_scheduling_policy,
     bool                                   keep_larger_local_mem_after_resize)
 {
     auto created = create_and_push(device, sync_scheduling_policy, keep_larger_local_mem_after_resize);
     current::pop();
     return created;
 }

 namespace peer_to_peer {

 inline bool can_access(context_t accessor, context_t peer)
 {
     return device::peer_to_peer::detail_::can_access(accessor.device_id(), peer.device_id());
 }

 inline void enable_access(context_t accessor, context_t peer)
 {
     detail_::enable_access(accessor.handle(), peer.handle());
 }

 inline void disable_access(context_t accessor, context_t peer)
 {
     detail_::disable_access(accessor.handle(), peer.handle());
 }

 inline void enable_bidirectional_access(context_t first, context_t second)
 {
     // Note: What happens when first and second are the same context? Or on the same device?
     enable_access(first,  second);
     enable_access(second, first );
 }

 inline void disable_bidirectional_access(context_t first, context_t second)
 {
     // Note: What happens when first and second are the same context? Or on the same device?
     disable_access(first,  second);
     disable_access(second, first );
 }


 } // namespace peer_to_peer

 namespace current {

 namespace peer_to_peer {

 inline void enable_access_to(const context_t &peer_context)
 {
     context::peer_to_peer::detail_::enable_access_to(peer_context.handle());
 }

 inline void disable_access_to(const context_t &peer_context)
 {
     context::peer_to_peer::detail_::disable_access_to(peer_context.handle());
 }

 } // namespace peer_to_peer

 } // namespace current

 } // namespace context

 inline memory::region_t context_t::global_memory_type::allocate(size_t size_in_bytes)
 {
     return memory::device::detail_::allocate(context_handle_, size_in_bytes);
 }

 inline memory::region_t context_t::global_memory_type::allocate_managed(
     size_t size_in_bytes, memory::managed::initial_visibility_t initial_visibility)
 {
     return memory::managed::detail_::allocate(context_handle_, size_in_bytes, initial_visibility);
 }


 inline device_t context_t::global_memory_type::associated_device() const
 {
     return cuda::device::get(device_id_);
 }

 inline context_t context_t::global_memory_type::associated_context() const
 {
     static constexpr const bool non_owning { false };
     return cuda::context::wrap(device_id_, context_handle_, non_owning);
 }

 inline bool context_t::is_primary() const
 {
     return context::current::detail_::is_primary(handle(), device_id());
 }

 // Note: The context_t::create_module() member functions are defined in module.hpp,
 // for better separation of runtime-origination and driver-originating headers; see
 // issue #320 on the issue tracker.

 inline void context_t::enable_access_to(const context_t& peer) const
 {
     context::peer_to_peer::enable_access(*this, peer);
 }

 inline void context_t::disable_access_to(const context_t& peer) const
 {
     context::peer_to_peer::disable_access(*this, peer);
 }

 inline device_t context_t::device() const
 {
     return device::wrap(device_id_);
 }

 inline stream_t context_t::create_stream(
     bool                will_synchronize_with_default_stream,
     stream::priority_t  priority) const
 {
     return stream::detail_::create(device_id_, handle_, will_synchronize_with_default_stream, priority);
 }

 inline event_t context_t::create_event(
     bool uses_blocking_sync,
     bool records_timing,
     bool interprocess) const
 {
     return cuda::event::detail_::create(
         device_id_, handle_, do_not_hold_primary_context_refcount_unit,
         uses_blocking_sync, records_timing, interprocess);
 }

 inline stream_t context_t::default_stream() const
 {
     return stream::wrap(device_id_, handle_, stream::default_stream_handle, do_not_take_ownership);
 }

 template <typename Kernel, typename ... KernelParameters>
 void context_t::launch(
     Kernel                  kernel,
     launch_configuration_t  launch_configuration,
     KernelParameters...     parameters) const
 {
     default_stream().enqueue.kernel_launch(kernel, launch_configuration, parameters...);
 }

 } // namespace cuda

 #endif // MULTI_WRAPPER_IMPLS_CONTEXT_HPP_

cuda::context_t::create_event
event_t create_event(bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess) const
Create a new event within this context; see cuda::event::create() for details regarding the parameter...
Definition: context.hpp:306

cuda::stream_t
Proxy class for a CUDA stream.
Definition: stream.hpp:246

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::stream::priority_t
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246

cuda::memory::managed::region_t
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960

cuda::launch_configuration_t
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::event_t
Wrapper class for a CUDA event.
Definition: event.hpp:133

cuda::device::primary_context_t
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112

cuda::context::peer_to_peer::enable_bidirectional_access
void enable_bidirectional_access(context_t first, context_t second)
Enable access both by the first to the second context and the other way around.
Definition: context.hpp:215

cuda::context_t::disable_access_to
void disable_access_to(const context_t &peer) const
Prevent kernels and memory operations within this context from involving memory allocated in a peer c...
Definition: context.hpp:289

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::device_t::id
device::id_t id() const noexcept
Return the proxied device&#39;s ID.
Definition: device.hpp:594

cuda::context_t::is_primary
bool is_primary() const
Definition: context.hpp:275

cuda::context_t::global_memory_type::associated_device
device_t associated_device() const
Device on which te memory managed with this object is allocated.
Definition: context.hpp:264

cuda::context::host_thread_sync_scheduling_policy_t
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884

cuda::initialize_driver
void initialize_driver()
Obtains the CUDA Runtime version.
Definition: miscellany.hpp:26

cuda::synchronize
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968

cuda::context_t::create_stream
stream_t create_stream(bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
Create a new event within this context; see cuda::stream::create() for details regarding the paramete...
Definition: context.hpp:299

cuda::device::get
device_t get(id_t id)
Returns a proxy for the CUDA device with a given id.
Definition: device.hpp:837

cuda::stream::wrap
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1006

cuda::context::current::peer_to_peer::enable_access_to
void enable_access_to(const context_t &peer_context)
Allows subsequently-executed memory operations and kernels to access the memory associated with the s...
Definition: context.hpp:236

cuda::array::handle_t
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34

cuda::context_t::global_memory_type::allocate
memory::region_t allocate(size_t size_in_bytes)
Allocate a region of memory on the device.
Definition: context.hpp:252

cuda::context::current::peer_to_peer::disable_access_to
void disable_access_to(const context_t &peer_context)
Prevents subsequently-executed memory operations and kernels from accessing the memory associated wit...
Definition: context.hpp:241

cuda::context_t::enable_access_to
void enable_access_to(const context_t &peer) const
Allow kernels and memory operations within this context to involve memory allocated in a peer context...
Definition: context.hpp:284

cuda::context::peer_to_peer::disable_access
void disable_access(context_t accessor, context_t peer)
Disable access by one CUDA device to the global memory of another.
Definition: context.hpp:210

cuda::context_t::global_memory_type::allocate_managed
memory::region_t allocate_managed(size_t size_in_bytes, cuda::memory::managed::initial_visibility_t initial_visibility=cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access)
Allocates memory on the device whose pointer is also visible on the host, and possibly on other devic...
Definition: context.hpp:257

cuda::context::peer_to_peer::enable_access
void enable_access(context_t accessor, context_t peer)
Enable access by one CUDA device to the global memory of another.
Definition: context.hpp:205

cuda::event::interprocess
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96

cuda::device::wrap
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:825

cuda::context::peer_to_peer::can_access
bool can_access(context_t accessor, context_t peer)
Check if a CUDA context can access the global memory of another CUDA context.
Definition: context.hpp:200

cuda::context::is_primary
bool is_primary(const context_t &context)
Definition: context.hpp:51

cuda::stream::default_stream_handle
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42

cuda::context_t::global_memory_type::associated_context
context_t associated_context() const
Context in which te memory managed with this object is recognized / usable.
Definition: context.hpp:269

cuda::context::peer_to_peer::disable_bidirectional_access
void disable_bidirectional_access(context_t first, context_t second)
Disable access both by the first to the second context and the other way around.
Definition: context.hpp:222

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

cuda::memory::managed::initial_visibility_t
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753