eyalroz/cuda-api-wrappers/multi__wrapper__impls_2device_8hpp_source.html

 #pragma once
 #ifndef MULTI_WRAPPER_IMPLS_DEVICE_HPP_
 #define MULTI_WRAPPER_IMPLS_DEVICE_HPP_

 #include "../device.hpp"
 #include "../error.hpp"
 #include "../event.hpp"
 #include "../kernel_launch.hpp"
 #include "../stream.hpp"
 #include "../primary_context.hpp"
 #include "../current_context.hpp"
 #include "../current_device.hpp"
 #include "../peer_to_peer.hpp"

 #include "../types.hpp"

 #include <string>

 namespace cuda {

 namespace device {

 namespace primary_context {

 inline bool is_active(const device_t& device)
 {
     return detail_::is_active(device.id());
 }

 inline void destroy(const device_t& device)
 {
     auto status = cuDevicePrimaryCtxReset(device.id());
     throw_if_error_lazy(status, "Failed destroying/resetting the primary context of device " + ::std::to_string(device.id()));
 }

 inline primary_context_t get(const device_t& device)
 {
     auto pc_handle = detail_::get_handle(device.id(), true);
     return detail_::wrap( device.id(), pc_handle, true);
 }

 namespace detail_ {

 // Use this when you need a PC, you don't have a device_t to hang it on,
 // and you don't want it to get deactivated/destroyed right after you use it.
 inline primary_context_t leaky_get(cuda::device::id_t device_id)
 {
     bool need_to_activate_and_leak = not cuda::device::primary_context::detail_::is_active(device_id);
     auto pc_handle = cuda::device::primary_context::detail_::get_handle(device_id, true);
     return cuda::device::primary_context::detail_::wrap(device_id, pc_handle, not need_to_activate_and_leak);
 }

 } // namespace detail_

 } // namespace primary_context

 namespace peer_to_peer {

 inline bool can_access(const device_t& accessor, const device_t& peer)
 {
     return detail_::can_access(accessor.id(), peer.id());
 }

 inline void enable_access(const device_t& accessor, const device_t& peer)
 {
     return context::peer_to_peer::enable_access(accessor.primary_context(), peer.primary_context());
 }

 inline void disable_access(const device_t& accessor, const device_t& peer)
 {
 #ifndef NDEBUG
     if (accessor == peer) {
         throw ::std::invalid_argument("A device cannot be used as its own peer");
     }
 #endif
     context::peer_to_peer::disable_access(accessor.primary_context(), peer.primary_context());
 }

 inline bool can_access_each_other(const device_t& first, const device_t& second)
 {
     return can_access(first, second) and can_access(second, first);
 }

 inline void enable_bidirectional_access(const device_t& first, const device_t& second)
 {
 #ifndef NDEBUG
     if (first == second) {
         throw ::std::invalid_argument("A device cannot be used as its own peer");
     }
 #endif
     context::peer_to_peer::enable_bidirectional_access(first.primary_context(), second.primary_context());
 }

 inline void disable_bidirectional_access(const device_t& first, const device_t& second)
 {
 #ifndef NDEBUG
     if (first == second) {
         throw ::std::invalid_argument("A device cannot be used as its own peer");
     }
 #endif
     context::peer_to_peer::disable_bidirectional_access(first.primary_context(), second.primary_context());
 }

 inline attribute_value_t get_attribute(attribute_t attribute, const device_t& first, const device_t& second)
 {
 #ifndef NDEBUG
     if (first == second) {
         throw ::std::invalid_argument("A device cannot be used as its own peer");
     }
 #endif
     return detail_::get_attribute(attribute, first.id(), second.id());
 }

 } // namespace peer_to_peer

 inline stream_t primary_context_t::default_stream() const noexcept
 {
     return stream::wrap(device_id_, handle_, stream::default_stream_handle);
 }

 } // namespace device

 // device_t methods

 inline stream_t device_t::default_stream(bool hold_primary_context_refcount_unit) const
 {
     auto pc = primary_context();
     if (hold_primary_context_refcount_unit) {
         device::primary_context::detail_::increase_refcount(id_);
     }
     return stream::wrap(
         id(), pc.handle(), stream::default_stream_handle,
         do_not_take_ownership, hold_primary_context_refcount_unit);
 }

 inline stream_t device_t::create_stream(
     bool                will_synchronize_with_default_stream,
     stream::priority_t  priority) const
 {
     return stream::create(*this, will_synchronize_with_default_stream, priority);
 }

 inline device::primary_context_t device_t::primary_context(bool hold_pc_refcount_unit) const
 {
     auto pc_handle = primary_context_handle();
     if (hold_pc_refcount_unit) {
         device::primary_context::detail_::increase_refcount(id_);
         // Q: Why increase the refcount here, when `primary_context_handle()`
         //    ensured this has already happened for this object?
         // A: Because an unscoped primary_context_t needs its own refcount
         //    unit (e.g. in case this object gets destructed but the
         //    primary_context_t is still alive).
     }
     return device::primary_context::detail_::wrap(id_, pc_handle, hold_pc_refcount_unit);
 }

 inline void synchronize(const device_t& device)
 {
     auto pc = device.primary_context();
     CAW_SET_SCOPE_CONTEXT(pc.handle());
     context::current::detail_::synchronize(device.id(), pc.handle());
 }

 template <typename Kernel, typename ... KernelParameters>
 void device_t::launch(
     Kernel                  kernel,
     launch_configuration_t  launch_configuration,
     KernelParameters...     parameters) const
 {
     auto pc = primary_context();
     pc.launch(kernel, launch_configuration, parameters...);
 }

 inline context_t device_t::create_context(
     context::host_thread_sync_scheduling_policy_t   sync_scheduling_policy,
     bool                                            keep_larger_local_mem_after_resize) const
 {
     return context::create(*this, sync_scheduling_policy, keep_larger_local_mem_after_resize);
 }

 inline event_t device_t::create_event(
     bool          uses_blocking_sync,
     bool          records_timing,
     bool          interprocess)
 {
     // The current implementation of event::create is not super-smart,
     // but it's probably not worth it trying to improve just this function
     return event::create(*this, uses_blocking_sync, records_timing, interprocess);
 }

 } // namespace cuda

 #endif // MULTI_WRAPPER_IMPLS_DEVICE_HPP_

cuda::device::attribute_value_t
int attribute_value_t
All CUDA device attributes (cuda::device::attribute_t) have a value of this type. ...
Definition: types.hpp:860

cuda::stream::create
stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device.
Definition: stream.hpp:57

cuda::stream_t
Proxy class for a CUDA stream.
Definition: stream.hpp:246

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda::device::peer_to_peer::can_access_each_other
bool can_access_each_other(const device_t &first, const device_t &second)
Determine whether two CUDA devices can currently access each other.
Definition: device.hpp:88

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::stream::priority_t
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246

cuda::device::peer_to_peer::can_access
bool can_access(const device_t &accessor, const device_t &peer)
Determine whether one CUDA device can access the global memory of another CUDA device.
Definition: device.hpp:68

cuda::launch_configuration_t
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69

cuda::device_t::create_context
context_t create_context(context::host_thread_sync_scheduling_policy_t sync_scheduling_policy=context::heuristic, bool keep_larger_local_mem_after_resize=false) const
See cuda::context::create()
Definition: device.hpp:183

cuda::event_t
Wrapper class for a CUDA event.
Definition: event.hpp:133

cuda::device::primary_context_t
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112

cuda::device_t::default_stream
stream_t default_stream(bool hold_primary_context_refcount_unit=false) const
Obtain a wrapper for the (always-existing) default stream within the device&#39; primary context...
Definition: device.hpp:134

cuda::device::peer_to_peer::get_attribute
attribute_value_t get_attribute(attribute_t attribute, const device_t &first, const device_t &second)
Get one of the numeric attributes for a(n ordered) pair of devices, relating to their interaction...
Definition: device.hpp:113

cuda::context::peer_to_peer::enable_bidirectional_access
void enable_bidirectional_access(context_t first, context_t second)
Enable access both by the first to the second context and the other way around.
Definition: context.hpp:215

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::device_t::create_stream
stream_t create_stream(bool will_synchronize_with_default_stream, stream::priority_t priority=cuda::stream::default_priority) const
See cuda::stream::create()
Definition: device.hpp:145

cuda::device_t::id
device::id_t id() const noexcept
Return the proxied device&#39;s ID.
Definition: device.hpp:594

cuda::device::primary_context_t::default_stream
stream_t default_stream() const noexcept
Definition: device.hpp:125

cuda::context::host_thread_sync_scheduling_policy_t
host_thread_sync_scheduling_policy_t
Scheduling policies the CUDA driver may use when the host-side thread it is running in needs to wait ...
Definition: types.hpp:884

cuda::device_t::launch
void launch(Kernel kernel, launch_configuration_t launch_configuration, KernelParameters... arguments) const
Launch a kernel on the default stream of the device&#39; primary context.
Definition: device.hpp:174

cuda::synchronize
void synchronize(const device_t &device)
Waits for all previously-scheduled tasks on all streams (= queues) on a specified device to conclude...
Definition: device.hpp:166

cuda::synchronize
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968

cuda::event::create
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45

cuda::device_t::primary_context
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device&#39;s primary context - the one used by runtime API calls.
Definition: device.hpp:152

cuda::stream::wrap
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1006

cuda::device_t::create_event
event_t create_event(bool uses_blocking_sync=event::sync_by_busy_waiting, bool records_timing=event::do_record_timings, bool interprocess=event::not_interprocess)
See cuda::event::create()
Definition: device.hpp:190

cuda::device::peer_to_peer::attribute_t
CUdevice_P2PAttribute attribute_t
While Individual CUDA devices have individual "attributes" (attribute_t), there are also attributes c...
Definition: types.hpp:869

throw_if_error_lazy
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316

cuda::device::peer_to_peer::disable_access
void disable_access(const device_t &accessor, const device_t &peer)
Disable access by one CUDA device to the global memory of another.
Definition: device.hpp:78

cuda::device::peer_to_peer::enable_access
void enable_access(const device_t &accessor, const device_t &peer)
Enable access by one CUDA device to the global memory of another.
Definition: device.hpp:73

cuda::context::peer_to_peer::disable_access
void disable_access(context_t accessor, context_t peer)
Disable access by one CUDA device to the global memory of another.
Definition: context.hpp:210

cuda::context::peer_to_peer::enable_access
void enable_access(context_t accessor, context_t peer)
Enable access by one CUDA device to the global memory of another.
Definition: context.hpp:205

cuda::event::interprocess
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96

cuda::device::wrap
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:825

cuda::device::peer_to_peer::enable_bidirectional_access
void enable_bidirectional_access(const device_t &first, const device_t &second)
Enable access both by the first to the second device and the other way around.
Definition: device.hpp:93

cuda::stream::default_stream_handle
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42

cuda::context::peer_to_peer::disable_bidirectional_access
void disable_bidirectional_access(context_t first, context_t second)
Disable access both by the first to the second context and the other way around.
Definition: context.hpp:222

cuda::device::primary_context::is_active
bool is_active(const device_t &device)
Definition: device.hpp:34

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

cuda::device::peer_to_peer::disable_bidirectional_access
void disable_bidirectional_access(const device_t &first, const device_t &second)
Disable access both by the first to the second device and the other way around.
Definition: device.hpp:103