11 #ifndef MULTI_WRAPPER_IMPLS_STREAM_HPP_    12 #define MULTI_WRAPPER_IMPLS_STREAM_HPP_    14 #include "../array.hpp"    15 #include "../device.hpp"    16 #include "../event.hpp"    17 #include "../kernel_launch.hpp"    18 #include "../pointer.hpp"    19 #include "../stream.hpp"    20 #include "../primary_context.hpp"    21 #include "../kernel.hpp"    22 #include "../current_context.hpp"    23 #include "../current_device.hpp"    31 inline ::std::string identify(
const stream_t& stream)
    33     return identify(stream.handle(), stream.context().handle(), stream.device().id());
    36 #if CUDA_VERSION >= 9020    39     return context::detail_::get_device_id(context_handle_of(stream_handle));
    41 #endif // CUDA_VERSION >= 9020    43 inline void record_event_in_current_context(
    49     auto status = cuEventRecord(event_handle, stream_handle);
    51         "Failed scheduling " + event::detail_::identify(event_handle)
    52         + 
" on " + stream::detail_::identify(stream_handle, current_context_handle_, current_device_id));
    59     bool             synchronizes_with_default_stream,
    62     auto pc = device.
primary_context(do_not_hold_primary_context_refcount_unit);
    63     device::primary_context::detail_::increase_refcount(device.
id());
    64     return create(pc, synchronizes_with_default_stream, priority, do_hold_primary_context_refcount_unit);
    69     bool              synchronizes_with_default_stream,
    71     bool              hold_pc_refcount_unit)
    73     return detail_::create(
    74         context.device_id(), context.handle(), synchronizes_with_default_stream,
    75         priority, hold_pc_refcount_unit);
    82     CAW_SET_SCOPE_CONTEXT(associated_stream.context_handle_);
    85     static constexpr 
const unsigned int flags = 0;
    87     auto status = cuStreamWaitEvent(associated_stream.handle_, event_.
handle(), flags);
    89         "Failed scheduling a wait for " + event::detail_::identify(event_.
handle())
    90         + 
" on " + stream::detail_::identify(associated_stream));
    96     auto device_id = associated_stream.device_id_;
    97     auto context_handle = associated_stream.context_handle_;
    98     auto stream_context_handle_ = associated_stream.context_handle_;
   100         throw ::std::invalid_argument(
   101             "Attempt to enqueue " + event::detail_::identify(existing_event)
   102             + 
" on a stream in a different context: " + stream::detail_::identify(associated_stream));
   104     context::current::detail_::scoped_ensurer_t ensure_a_context{context_handle};
   105     stream::detail_::record_event_in_current_context(
   106         device_id, context_handle, associated_stream.handle_,existing_event.
handle());
   107     return existing_event;
   111     bool          uses_blocking_sync,
   115     auto context_handle = associated_stream.context_handle_;
   116     CAW_SET_SCOPE_CONTEXT(context_handle);
   119     auto ev = event::detail_::create_in_current_context(
   120         associated_stream.device_id_,
   122         do_not_hold_primary_context_refcount_unit,
   123         uses_blocking_sync, records_timing, interprocess);
   137     static constexpr 
const bool dont_take_ownership { 
false };
   138     return context::wrap(device_id_, context_handle_, dont_take_ownership);
   141 #if CUDA_VERSION >= 11000   147         throw ::std::invalid_argument(
"Attempt to copy attributes between streams on different devices");
   150         throw ::std::invalid_argument(
"Attempt to copy attributes between streams on different contexts");
   154     auto status = cuStreamCopyAttributes(dest.
handle(), src.
handle());
   156         + 
" to " + stream::detail_::identify(src));
   159 #endif // CUDA_VERSION >= 11000   163 #endif // MULTI_WRAPPER_IMPLS_STREAM_HPP_ stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device. 
Definition: stream.hpp:57
 
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined. 
Definition: stream.hpp:260
 
Proxy class for a CUDA stream. 
Definition: stream.hpp:246
 
event::handle_t handle() const noexcept
The raw CUDA handle for this event. 
Definition: event.hpp:143
 
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps. 
Definition: stream.hpp:257
 
Wrapper class for a CUDA context. 
Definition: context.hpp:244
 
Definitions and functionality wrapping CUDA APIs. 
Definition: array.hpp:22
 
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority. 
Definition: types.hpp:246
 
Wrapper class for a CUDA event. 
Definition: event.hpp:133
 
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API. 
Definition: types.hpp:850
 
device_t device() const noexcept
The device w.r.t. which the stream is defined. 
Definition: stream.hpp:130
 
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined. 
Definition: event.hpp:140
 
void wait(const event_t &event_) const
Will pause all further activity on the stream until the specified event has occurred (i...
Definition: stream.hpp:80
 
device::id_t id() const noexcept
Return the proxied device's ID. 
Definition: device.hpp:594
 
event_t & event(event_t &existing_event) const
Have an event 'fire', i.e. 
Definition: stream.hpp:94
 
CUevent handle_t
The CUDA driver's raw handle for events. 
Definition: types.hpp:217
 
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device's primary context - the one used by runtime API calls. 
Definition: device.hpp:152
 
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance. 
Definition: stream.hpp:1006
 
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
 
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension) 
Definition: array.hpp:34
 
CUstream handle_t
The CUDA driver's raw handle for streams. 
Definition: types.hpp:239
 
Can be shared between processes. Must not be able to record timings. 
Definition: constants.hpp:96
 
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id. 
Definition: device.hpp:825
 
context_t context() const noexcept
The context in which this stream was defined. 
Definition: stream.hpp:135
 
Wrapper class for a CUDA device. 
Definition: device.hpp:135