11 #ifndef MULTI_WRAPPER_IMPLS_CONTEXT_HPP_    12 #define MULTI_WRAPPER_IMPLS_CONTEXT_HPP_    14 #include "../device.hpp"    15 #include "../stream.hpp"    16 #include "../event.hpp"    17 #include "../kernel.hpp"    18 #include "../virtual_memory.hpp"    19 #include "../current_context.hpp"    20 #include "../current_device.hpp"    21 #include "../peer_to_peer.hpp"    22 #include "../memory.hpp"    23 #include "../context.hpp"    32 inline handle_t get_primary_for_same_device(
handle_t handle, 
bool increase_refcount)
    34     auto device_id = get_device_id(handle);
    35     return device::primary_context::detail_::get_handle(device_id, increase_refcount);
    40     auto context_device_id = context::detail_::get_device_id(handle);
    41     if (context_device_id != device_id) {
    44     static constexpr 
const bool dont_increase_refcount { 
false };
    45     auto pc_handle = device::primary_context::detail_::get_handle(device_id, dont_increase_refcount);
    46     return handle == pc_handle;
    53     return context::detail_::is_primary_for_device(context.handle(), context.device_id());
    58     return detail_::synchronize(context.device_id(), context.handle());
    69     return cc_handle == device::primary_context::detail_::get_handle(current_context_device_id);
    76     auto current_context = 
get();
    77     return detail_::is_primary(current_context.handle(), current_context.device_id());
    82 inline scoped_override_t::scoped_override_t(
bool hold_primary_context_ref_unit, 
device::id_t device_id, 
handle_t context_handle)
    83 : hold_primary_context_ref_unit_(hold_primary_context_ref_unit), device_id_or_0_(device_id)
    85     if (hold_primary_context_ref_unit) { device::primary_context::detail_::increase_refcount(device_id); }
    89 inline scoped_override_t::~scoped_override_t() noexcept(
false)
    91     if (hold_primary_context_ref_unit_) { device::primary_context::detail_::decrease_refcount(device_id_or_0_); }
    99 inline handle_t push_default_if_missing()
   101     auto handle = detail_::get_handle();
   102     if (handle != context::detail_::none) {
   106     auto current_device_id = device::current::detail_::get_id();
   107     auto pc_handle = device::primary_context::detail_::obtain_and_increase_refcount(current_device_id);
   129 class scoped_existence_ensurer_t {
   133     bool decrease_pc_refcount_on_destruct_;
   135     explicit scoped_existence_ensurer_t(
bool avoid_pc_refcount_increase = 
true)
   137         auto status_and_handle = get_with_status();
   138         if (status_and_handle.status == cuda::status::not_yet_initialized) {
   139             context_handle = context::detail_::none;
   143             context_handle = status_and_handle.handle;
   145         if (context_handle == context::detail_::none) {
   146             device_id_ = device::current::detail_::get_id();
   147             context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device_id_);
   148             context::current::detail_::push(context_handle);
   149             decrease_pc_refcount_on_destruct_ = avoid_pc_refcount_increase;
   156             decrease_pc_refcount_on_destruct_ = 
false;
   160     ~scoped_existence_ensurer_t()
   162         if (context_handle != context::detail_::none and decrease_pc_refcount_on_destruct_) {
   163             context::current::detail_::pop();
   164             device::primary_context::detail_::decrease_refcount(device_id_);
   172         : parent(primary_context.is_owning(), primary_context.device_id(), primary_context.handle()) {}
   173 inline scoped_override_t::scoped_override_t(
const context_t& context) : parent(context.handle()) {}
   174 inline scoped_override_t::scoped_override_t(
context_t&& context) : parent(context.handle()) {}
   181     bool                                  keep_larger_local_mem_after_resize)
   183     auto handle = detail_::create_and_push(device.
id(), sync_scheduling_policy, keep_larger_local_mem_after_resize);
   184     bool take_ownership = 
true;
   185     return context::wrap(device.
id(), handle, take_ownership);
   191     bool                                   keep_larger_local_mem_after_resize)
   193     auto created = create_and_push(device, sync_scheduling_policy, keep_larger_local_mem_after_resize);
   198 namespace peer_to_peer {
   234 namespace peer_to_peer {
   238     context::peer_to_peer::detail_::enable_access_to(peer_context.handle());
   243     context::peer_to_peer::detail_::disable_access_to(peer_context.handle());
   254     return memory::device::detail_::allocate(context_handle_, size_in_bytes);
   260     return memory::managed::detail_::allocate(context_handle_, size_in_bytes, initial_visibility);
   271     static constexpr 
const bool non_owning { 
false };
   272     return cuda::context::wrap(device_id_, context_handle_, non_owning);
   277     return context::current::detail_::is_primary(handle(), device_id());
   294 inline device_t context_t::device()
 const   300     bool                will_synchronize_with_default_stream,
   303     return stream::detail_::create(device_id_, handle_, will_synchronize_with_default_stream, priority);
   307     bool uses_blocking_sync,
   311     return cuda::event::detail_::create(
   312         device_id_, handle_, do_not_hold_primary_context_refcount_unit,
   313         uses_blocking_sync, records_timing, interprocess);
   316 inline stream_t context_t::default_stream()
 const   321 template <
typename Kernel, 
typename ... KernelParameters>
   322 void context_t::launch(
   325     KernelParameters...     parameters)
 const   327     default_stream().enqueue.kernel_launch(kernel, launch_configuration, parameters...);
   332 #endif // MULTI_WRAPPER_IMPLS_CONTEXT_HPP_ event_t create_event(bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess) const
Create a new event within this context; see cuda::event::create() for details regarding the parameter...
Definition: context.hpp:306
 
Proxy class for a CUDA stream. 
Definition: stream.hpp:246
 
Wrapper class for a CUDA context. 
Definition: context.hpp:244
 
Definitions and functionality wrapping CUDA APIs. 
Definition: array.hpp:22
 
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority. 
Definition: types.hpp:246
 
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality. 
Definition: memory.hpp:1960
 
The full set of possible configuration parameters for launching a kernel on a GPU. 
Definition: launch_configuration.hpp:69
 
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}. 
Definition: types.hpp:878
 
Wrapper class for a CUDA event. 
Definition: event.hpp:133
 
A class for holding the primary context of a CUDA device. 
Definition: primary_context.hpp:112
 
void enable_bidirectional_access(context_t first, context_t second)
Enable access both by the first to the second context and the other way around. 
Definition: context.hpp:215
 
void disable_access_to(const context_t &peer) const
Prevent kernels and memory operations within this context from involving memory allocated in a peer c...
Definition: context.hpp:289
 
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API. 
Definition: types.hpp:850
 
device::id_t id() const noexcept
Return the proxied device's ID. 
Definition: device.hpp:594
 
bool is_primary() const
Definition: context.hpp:275
 
device_t associated_device() const
Device on which te memory managed with this object is allocated. 
Definition: context.hpp:264
 
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884
 
void initialize_driver()
Obtains the CUDA Runtime version. 
Definition: miscellany.hpp:26
 
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968
 
stream_t create_stream(bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
Create a new event within this context; see cuda::stream::create() for details regarding the paramete...
Definition: context.hpp:299
 
device_t get(id_t id)
Returns a proxy for the CUDA device with a given id. 
Definition: device.hpp:837
 
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance. 
Definition: stream.hpp:1006
 
void enable_access_to(const context_t &peer_context)
Allows subsequently-executed memory operations and kernels to access the memory associated with the s...
Definition: context.hpp:236
 
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension) 
Definition: array.hpp:34
 
memory::region_t allocate(size_t size_in_bytes)
Allocate a region of memory on the device. 
Definition: context.hpp:252
 
void disable_access_to(const context_t &peer_context)
Prevents subsequently-executed memory operations and kernels from accessing the memory associated wit...
Definition: context.hpp:241
 
void enable_access_to(const context_t &peer) const
Allow kernels and memory operations within this context to involve memory allocated in a peer context...
Definition: context.hpp:284
 
void disable_access(context_t accessor, context_t peer)
Disable access by one CUDA device to the global memory of another. 
Definition: context.hpp:210
 
memory::region_t allocate_managed(size_t size_in_bytes, cuda::memory::managed::initial_visibility_t initial_visibility=cuda::memory::managed::initial_visibility_t::to_supporters_of_concurrent_managed_access)
Allocates memory on the device whose pointer is also visible on the host, and possibly on other devic...
Definition: context.hpp:257
 
void enable_access(context_t accessor, context_t peer)
Enable access by one CUDA device to the global memory of another. 
Definition: context.hpp:205
 
Can be shared between processes. Must not be able to record timings. 
Definition: constants.hpp:96
 
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id. 
Definition: device.hpp:825
 
bool can_access(context_t accessor, context_t peer)
Check if a CUDA context can access the global memory of another CUDA context. 
Definition: context.hpp:200
 
bool is_primary(const context_t &context)
Definition: context.hpp:51
 
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
 
context_t associated_context() const
Context in which te memory managed with this object is recognized / usable. 
Definition: context.hpp:269
 
void disable_bidirectional_access(context_t first, context_t second)
Disable access both by the first to the second context and the other way around. 
Definition: context.hpp:222
 
Wrapper class for a CUDA device. 
Definition: device.hpp:135
 
initial_visibility_t
The choices of which categories CUDA devices must a managed memory region be visible to...
Definition: types.hpp:753