eyalroz/cuda-api-wrappers/multi__wrapper__impls_2stream_8hpp_source.html

 #pragma once
 #ifndef MULTI_WRAPPER_IMPLS_STREAM_HPP_
 #define MULTI_WRAPPER_IMPLS_STREAM_HPP_

 #include "../array.hpp"
 #include "../device.hpp"
 #include "../event.hpp"
 #include "../kernel_launch.hpp"
 #include "../pointer.hpp"
 #include "../stream.hpp"
 #include "../primary_context.hpp"
 #include "../kernel.hpp"
 #include "../current_context.hpp"
 #include "../current_device.hpp"

 namespace cuda {

 namespace stream {

 namespace detail_ {

 inline ::std::string identify(const stream_t& stream)
 {
     return identify(stream.handle(), stream.context().handle(), stream.device().id());
 }

 #if CUDA_VERSION >= 9020
 inline device::id_t device_id_of(stream::handle_t stream_handle)
 {
     return context::detail_::get_device_id(context_handle_of(stream_handle));
 }
 #endif // CUDA_VERSION >= 9020

 inline void record_event_in_current_context(
     device::id_t       current_device_id,
     context::handle_t  current_context_handle_,
     stream::handle_t   stream_handle,
     event::handle_t    event_handle)
 {
     auto status = cuEventRecord(event_handle, stream_handle);
     throw_if_error_lazy(status,
         "Failed scheduling " + event::detail_::identify(event_handle)
         + " on " + stream::detail_::identify(stream_handle, current_context_handle_, current_device_id));
 }

 } // namespace detail_

 inline stream_t create(
     const device_t&  device,
     bool             synchronizes_with_default_stream,
     priority_t       priority)
 {
     auto pc = device.primary_context(do_not_hold_primary_context_refcount_unit);
     device::primary_context::detail_::increase_refcount(device.id());
     return create(pc, synchronizes_with_default_stream, priority, do_hold_primary_context_refcount_unit);
 }

 inline stream_t create(
     const context_t&  context,
     bool              synchronizes_with_default_stream,
     priority_t        priority,
     bool              hold_pc_refcount_unit)
 {
     return detail_::create(
         context.device_id(), context.handle(), synchronizes_with_default_stream,
         priority, hold_pc_refcount_unit);
 }

 } // namespace stream

 inline void stream_t::enqueue_t::wait(const event_t& event_) const
 {
     CAW_SET_SCOPE_CONTEXT(associated_stream.context_handle_);

     // Required by the CUDA runtime API; the flags value is currently unused
     static constexpr const unsigned int flags = 0;

     auto status = cuStreamWaitEvent(associated_stream.handle_, event_.handle(), flags);
     throw_if_error_lazy(status,
         "Failed scheduling a wait for " + event::detail_::identify(event_.handle())
         + " on " + stream::detail_::identify(associated_stream));

 }

 inline event_t& stream_t::enqueue_t::event(event_t& existing_event) const
 {
     auto device_id = associated_stream.device_id_;
     auto context_handle = associated_stream.context_handle_;
     auto stream_context_handle_ = associated_stream.context_handle_;
     if (existing_event.context_handle() != stream_context_handle_) {
         throw ::std::invalid_argument(
             "Attempt to enqueue " + event::detail_::identify(existing_event)
             + " on a stream in a different context: " + stream::detail_::identify(associated_stream));
     }
     context::current::detail_::scoped_ensurer_t ensure_a_context{context_handle};
     stream::detail_::record_event_in_current_context(
         device_id, context_handle, associated_stream.handle_,existing_event.handle());
     return existing_event;
 }

 inline event_t stream_t::enqueue_t::event(
     bool          uses_blocking_sync,
     bool          records_timing,
     bool          interprocess) const
 {
     auto context_handle = associated_stream.context_handle_;
     CAW_SET_SCOPE_CONTEXT(context_handle);

         // Note that even if this stream is in the primary context, the created event
     auto ev = event::detail_::create_in_current_context(
         associated_stream.device_id_,
         context_handle,
         do_not_hold_primary_context_refcount_unit,
         uses_blocking_sync, records_timing, interprocess);
     // will not extend the context's life. If the user wants that extension, they
         // should have the _stream_ hold a reference to the primary context.
     this->event(ev);
     return ev;
 }

 inline device_t stream_t::device() const noexcept
 {
     return cuda::device::wrap(device_id_);
 }

 inline context_t stream_t::context() const noexcept
 {
     static constexpr const bool dont_take_ownership { false };
     return context::wrap(device_id_, context_handle_, dont_take_ownership);
 }

 #if CUDA_VERSION >= 11000

 inline void copy_attributes(const stream_t &dest, const stream_t &src)
 {
 #ifndef NDEBUG
     if (dest.device() != src.device()) {
         throw ::std::invalid_argument("Attempt to copy attributes between streams on different devices");
     }
     if (dest.context() != src.context()) {
         throw ::std::invalid_argument("Attempt to copy attributes between streams on different contexts");
     }
 #endif
     CAW_SET_SCOPE_CONTEXT(dest.context_handle());
     auto status = cuStreamCopyAttributes(dest.handle(), src.handle());
     throw_if_error_lazy(status, "Copying attributes from " + stream::detail_::identify(src)
         + " to " + stream::detail_::identify(src));
 }

 #endif // CUDA_VERSION >= 11000

 } // namespace cuda

 #endif // MULTI_WRAPPER_IMPLS_STREAM_HPP_

cuda::stream::create
stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device.
Definition: stream.hpp:57

cuda::stream_t::context_handle
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: stream.hpp:260

cuda::stream_t
Proxy class for a CUDA stream.
Definition: stream.hpp:246

cuda::event_t::handle
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143

cuda::stream_t::handle
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps.
Definition: stream.hpp:257

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::stream::priority_t
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246

cuda::event_t
Wrapper class for a CUDA event.
Definition: event.hpp:133

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::stream_t::device
device_t device() const noexcept
The device w.r.t. which the stream is defined.
Definition: stream.hpp:130

cuda::event_t::context_handle
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:140

cuda::stream_t::enqueue_t::wait
void wait(const event_t &event_) const
Will pause all further activity on the stream until the specified event has occurred (i...
Definition: stream.hpp:80

cuda::device_t::id
device::id_t id() const noexcept
Return the proxied device&#39;s ID.
Definition: device.hpp:594

cuda::stream_t::enqueue_t::event
event_t & event(event_t &existing_event) const
Have an event &#39;fire&#39;, i.e.
Definition: stream.hpp:94

cuda::event::handle_t
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217

cuda::device_t::primary_context
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device&#39;s primary context - the one used by runtime API calls.
Definition: device.hpp:152

cuda::stream::wrap
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1006

throw_if_error_lazy
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316

cuda::array::handle_t
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34

cuda::stream::handle_t
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239

cuda::event::interprocess
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96

cuda::device::wrap
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:825

cuda::stream_t::context
context_t context() const noexcept
The context in which this stream was defined.
Definition: stream.hpp:135

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135