cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
stream.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef MULTI_WRAPPER_IMPLS_STREAM_HPP_
12 #define MULTI_WRAPPER_IMPLS_STREAM_HPP_
13 
14 #include "../array.hpp"
15 #include "../device.hpp"
16 #include "../event.hpp"
17 #include "../kernel_launch.hpp"
18 #include "../pointer.hpp"
19 #include "../stream.hpp"
20 #include "../primary_context.hpp"
21 #include "../kernel.hpp"
22 #include "../current_context.hpp"
23 #include "../current_device.hpp"
24 
25 namespace cuda {
26 
27 namespace stream {
28 
29 namespace detail_ {
30 
31 inline ::std::string identify(const stream_t& stream)
32 {
33  return identify(stream.handle(), stream.context().handle(), stream.device().id());
34 }
35 
36 #if CUDA_VERSION >= 9020
37 inline device::id_t device_id_of(stream::handle_t stream_handle)
38 {
39  return context::detail_::get_device_id(context_handle_of(stream_handle));
40 }
41 #endif // CUDA_VERSION >= 9020
42 
43 inline void record_event_in_current_context(
44  device::id_t current_device_id,
45  context::handle_t current_context_handle_,
46  stream::handle_t stream_handle,
47  event::handle_t event_handle)
48 {
49  auto status = cuEventRecord(event_handle, stream_handle);
50  throw_if_error_lazy(status,
51  "Failed scheduling " + event::detail_::identify(event_handle)
52  + " on " + stream::detail_::identify(stream_handle, current_context_handle_, current_device_id));
53 }
54 
55 } // namespace detail_
56 
58  const device_t& device,
59  bool synchronizes_with_default_stream,
60  priority_t priority)
61 {
62  auto pc = device.primary_context(do_not_hold_primary_context_refcount_unit);
63  device::primary_context::detail_::increase_refcount(device.id());
64  return create(pc, synchronizes_with_default_stream, priority, do_hold_primary_context_refcount_unit);
65 }
66 
68  const context_t& context,
69  bool synchronizes_with_default_stream,
70  priority_t priority,
71  bool hold_pc_refcount_unit)
72 {
73  return detail_::create(
74  context.device_id(), context.handle(), synchronizes_with_default_stream,
75  priority, hold_pc_refcount_unit);
76 }
77 
78 } // namespace stream
79 
80 inline void stream_t::enqueue_t::wait(const event_t& event_) const
81 {
82  CAW_SET_SCOPE_CONTEXT(associated_stream.context_handle_);
83 
84  // Required by the CUDA runtime API; the flags value is currently unused
85  static constexpr const unsigned int flags = 0;
86 
87  auto status = cuStreamWaitEvent(associated_stream.handle_, event_.handle(), flags);
88  throw_if_error_lazy(status,
89  "Failed scheduling a wait for " + event::detail_::identify(event_.handle())
90  + " on " + stream::detail_::identify(associated_stream));
91 
92 }
93 
94 inline event_t& stream_t::enqueue_t::event(event_t& existing_event) const
95 {
96  auto device_id = associated_stream.device_id_;
97  auto context_handle = associated_stream.context_handle_;
98  auto stream_context_handle_ = associated_stream.context_handle_;
99  if (existing_event.context_handle() != stream_context_handle_) {
100  throw ::std::invalid_argument(
101  "Attempt to enqueue " + event::detail_::identify(existing_event)
102  + " on a stream in a different context: " + stream::detail_::identify(associated_stream));
103  }
104  context::current::detail_::scoped_ensurer_t ensure_a_context{context_handle};
105  stream::detail_::record_event_in_current_context(
106  device_id, context_handle, associated_stream.handle_,existing_event.handle());
107  return existing_event;
108 }
109 
111  bool uses_blocking_sync,
112  bool records_timing,
113  bool interprocess) const
114 {
115  auto context_handle = associated_stream.context_handle_;
116  CAW_SET_SCOPE_CONTEXT(context_handle);
117 
118  // Note that even if this stream is in the primary context, the created event
119  auto ev = event::detail_::create_in_current_context(
120  associated_stream.device_id_,
121  context_handle,
122  do_not_hold_primary_context_refcount_unit,
123  uses_blocking_sync, records_timing, interprocess);
124  // will not extend the context's life. If the user wants that extension, they
125  // should have the _stream_ hold a reference to the primary context.
126  this->event(ev);
127  return ev;
128 }
129 
130 inline device_t stream_t::device() const noexcept
131 {
132  return cuda::device::wrap(device_id_);
133 }
134 
135 inline context_t stream_t::context() const noexcept
136 {
137  static constexpr const bool dont_take_ownership { false };
138  return context::wrap(device_id_, context_handle_, dont_take_ownership);
139 }
140 
141 #if CUDA_VERSION >= 11000
142 
143 inline void copy_attributes(const stream_t &dest, const stream_t &src)
144 {
145 #ifndef NDEBUG
146  if (dest.device() != src.device()) {
147  throw ::std::invalid_argument("Attempt to copy attributes between streams on different devices");
148  }
149  if (dest.context() != src.context()) {
150  throw ::std::invalid_argument("Attempt to copy attributes between streams on different contexts");
151  }
152 #endif
153  CAW_SET_SCOPE_CONTEXT(dest.context_handle());
154  auto status = cuStreamCopyAttributes(dest.handle(), src.handle());
155  throw_if_error_lazy(status, "Copying attributes from " + stream::detail_::identify(src)
156  + " to " + stream::detail_::identify(src));
157 }
158 
159 #endif // CUDA_VERSION >= 11000
160 
161 } // namespace cuda
162 
163 #endif // MULTI_WRAPPER_IMPLS_STREAM_HPP_
164 
stream_t create(const device_t &device, bool synchronizes_with_default_stream, priority_t priority)
Create a new stream (= queue) in the primary execution context of a CUDA device.
Definition: stream.hpp:57
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: stream.hpp:260
Proxy class for a CUDA stream.
Definition: stream.hpp:246
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps.
Definition: stream.hpp:257
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
int priority_t
CUDA streams have a scheduling priority, with lower values meaning higher priority.
Definition: types.hpp:246
Wrapper class for a CUDA event.
Definition: event.hpp:133
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
device_t device() const noexcept
The device w.r.t. which the stream is defined.
Definition: stream.hpp:130
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:140
void wait(const event_t &event_) const
Will pause all further activity on the stream until the specified event has occurred (i...
Definition: stream.hpp:80
device::id_t id() const noexcept
Return the proxied device's ID.
Definition: device.hpp:594
event_t & event(event_t &existing_event) const
Have an event 'fire', i.e.
Definition: stream.hpp:94
CUevent handle_t
The CUDA driver's raw handle for events.
Definition: types.hpp:217
device::primary_context_t primary_context(bool hold_pc_refcount_unit=false) const
Produce a proxy for the device's primary context - the one used by runtime API calls.
Definition: device.hpp:152
stream_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t stream_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing stream in a stream_t instance.
Definition: stream.hpp:1006
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
device_t wrap(id_t id) NOEXCEPT_IF_NDEBUG
Returns a wrapper for the CUDA device with a given id.
Definition: device.hpp:825
context_t context() const noexcept
The context in which this stream was defined.
Definition: stream.hpp:135
Wrapper class for a CUDA device.
Definition: device.hpp:135