cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
event.hpp
Go to the documentation of this file.
1 
8 #pragma once
9 #ifndef CUDA_API_WRAPPERS_EVENT_HPP_
10 #define CUDA_API_WRAPPERS_EVENT_HPP_
11 
12 #include <cuda/api/constants.hpp>
14 #include <cuda/api/error.hpp>
15 #include <cuda/api/ipc.hpp>
16 #include <cuda/common/types.hpp>
17 
18 #include <cuda_runtime_api.h>
19 
20 #include <chrono> // for duration types
21 
22 namespace cuda {
23 
25 class device_t;
26 class stream_t;
28 
29 namespace event {
30 
31 namespace detail_ {
32 
40 inline void enqueue(stream::id_t stream_id, id_t event_id) {
41  auto status = cudaEventRecord(event_id, stream_id);
42  cuda::throw_if_error(status,
43  "Failed recording event " + cuda::detail_::ptr_as_hex(event_id)
44  + " on stream " + cuda::detail_::ptr_as_hex(stream_id));
45 }
46 
47 constexpr unsigned inline make_flags(bool uses_blocking_sync, bool records_timing, bool interprocess)
48 {
49  return
50  ( uses_blocking_sync ? cudaEventBlockingSync : 0 )
51  | ( records_timing ? 0 : cudaEventDisableTiming )
52  | ( interprocess ? cudaEventInterprocess : 0 );
53 }
54 
55 } // namespace detail_
56 
57 } // namespace event
58 
60 class event_t;
62 
63 namespace event {
64 
65 namespace detail_ {
79 event_t wrap(
80  device::id_t device_id,
81  id_t event_id,
82  bool take_ownership = false) noexcept;
83 
84 } // namespace detail_
85 
86 } // namespace event
87 
88 inline void synchronize(const event_t& event);
89 
104 class event_t {
105 public: // data member non-mutator getters
109  event::id_t id() const noexcept{ return id_; }
114  device::id_t device_id() const noexcept { return device_id_; }
115  device_t device() const noexcept;
120  bool is_owning() const noexcept { return owning; }
121 
122 public: // other non-mutator methods
123 
135  bool has_occurred() const
136  {
137  auto status = cudaEventQuery(id_);
138  if (status == cuda::status::success) return true;
139  if (status == cuda::status::not_ready) return false;
140  throw cuda::runtime_error(status,
141  "Could not determine whether event " + detail_::ptr_as_hex(id_)
142  + "has already occurred or not.");
143  }
144 
149  bool query() const { return has_occurred(); }
150 
151 
152 public: // other mutator methods
153 
154 
161  void record()
162  {
163  event::detail_::enqueue(stream::default_stream_id, id_);
164  }
165 
172  void record(const stream_t& stream);
173 
180  void fire(const stream_t& stream);
181 
186  void synchronize()
187  {
188  return cuda::synchronize(*this);
189  }
190 
191 protected: // constructors
192 
193  event_t(device::id_t device_id, event::id_t event_id, bool take_ownership) noexcept
194  : device_id_(device_id), id_(event_id), owning(take_ownership) { }
195 
196 public: // friendship
197 
198  friend event_t event::detail_::wrap(device::id_t device_id, event::id_t event_id, bool take_ownership) noexcept;
199 
200 public: // constructors and destructor
201 
202  event_t(const event_t& other) noexcept : event_t(other.device_id_, other.id_, false) { }
203 
204  event_t(event_t&& other) noexcept :
205  event_t(other.device_id_, other.id_, other.owning)
206  {
207  other.owning = false;
208  };
209 
210  ~event_t()
211  {
212  if (owning) { cudaEventDestroy(id_); }
213  }
214 
215 public: // operators
216 
217  event_t& operator=(const event_t& other) = delete;
218  event_t& operator=(event_t&& other) = delete;
219 
220 protected: // data members
221  const device::id_t device_id_;
222  const event::id_t id_;
223  bool owning;
224  // this field is mutable only for enabling move construction; other
225  // than in that case it must not be altered
226 };
227 
228 namespace event {
229 
234 using duration_t = ::std::chrono::duration<float, ::std::milli>;
235 
246 inline duration_t time_elapsed_between(const event_t& start, const event_t& end)
247 {
248  float elapsed_milliseconds;
249  auto status = cudaEventElapsedTime(&elapsed_milliseconds, start.id(), end.id());
250  cuda::throw_if_error(status, "determining the time elapsed between events");
251  return duration_t { elapsed_milliseconds };
252 }
253 
254 namespace detail_ {
255 
271 inline event_t wrap(
272  device::id_t device_id,
273  id_t event_id,
274  bool take_ownership) noexcept
275 {
276  return event_t(device_id, event_id, take_ownership);
277 }
278 
279 // Note: For now, event_t's need their device's ID - even if it's the current device;
280 // that explains the requirement in this function's interface
281 inline event_t create_on_current_device(
282  device::id_t current_device_id,
283  bool uses_blocking_sync,
284  bool records_timing,
285  bool interprocess)
286 {
287  auto flags = make_flags(uses_blocking_sync, records_timing, interprocess);
288  cuda::event::id_t new_event_id;
289  auto status = cudaEventCreateWithFlags(&new_event_id, flags);
290  cuda::throw_if_error(status, "failed creating a CUDA event associated with the current device");
291  // Note: We're trusting CUDA to actually have succeeded if it reports success,
292  // so we're not checking the newly-created event id - which is really just
293  // a pointer - for nullness
294  return wrap(current_device_id, new_event_id, do_take_ownership);
295 }
296 
301 inline event_t create(
302  device::id_t device_id,
303  bool uses_blocking_sync,
304  bool records_timing,
305  bool interprocess)
306 {
307  device::current::detail_::scoped_override_t
308  set_device_for_this_scope(device_id);
309  return detail_::create_on_current_device(device_id, uses_blocking_sync, records_timing, interprocess);
310 }
311 
312 } // namespace detail_
313 
325 inline event_t create(
326  device_t& device,
327  bool uses_blocking_sync = sync_by_busy_waiting, // Yes, that's the runtime default
328  bool records_timing = do_record_timings,
330 
331 } // namespace event
332 
343 inline void synchronize(const event_t& event)
344 {
345  auto device_id = event.device_id();
346  auto event_id = event.id();
347  device::current::detail_::scoped_override_t device_for_this_scope(device_id);
348  auto status = cudaEventSynchronize(event_id);
349  throw_if_error(status, "Failed synchronizing the event with id "
350  + cuda::detail_::ptr_as_hex(event_id) + " on " + ::std::to_string(device_id));
351 }
352 
353 } // namespace cuda
354 
355 #endif // CUDA_API_WRAPPERS_EVENT_HPP_
::std::chrono::duration< float, ::std::milli > duration_t
The type used by the CUDA Runtime API to represent the time difference between pairs of events...
Definition: event.hpp:234
wrappers for CUDA&#39;s facilities for sharing on-device memory addresses and CUDA events between host pr...
Proxy class for a CUDA stream.
Definition: stream.hpp:138
bool is_owning() const noexcept
Is this wrapper responsible for having the CUDA Runtime API destroy the event when it destructs...
Definition: event.hpp:120
All definitions and functionality wrapping the CUDA Runtime API.
Definition: array.hpp:17
device::id_t device_id() const noexcept
The device with which this event is associated (i.e.
Definition: event.hpp:114
Proxy class for a CUDA event.
Definition: event.hpp:104
const stream::id_t default_stream_id
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
bool has_occurred() const
Has this event already occurred, or is it still pending on a stream?
Definition: event.hpp:135
void throw_if_error(cuda::status_t status, const ::std::string &message) noexcept(false)
Do nothing...
Definition: error.hpp:216
event::id_t id() const noexcept
The CUDA runtime API ID this object is wrapping.
Definition: event.hpp:109
Fundamental CUDA-related type definitions.
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
cudaStream_t id_t
The CUDA Runtime API&#39;s numeric handle for streams.
Definition: types.hpp:218
bool query() const
An alias for {event_t::has_occurred()} - to conform to how the CUDA runtime API names this functional...
Definition: event.hpp:149
void synchronize(device_t &device)
Suspends execution until all previously-scheduled tasks on the specified device (all contexts...
Definition: device.hpp:797
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:177
event_t create(device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new execution stream on a device.
Definition: multi_wrapper_impls.hpp:51
void synchronize()
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:186
duration_t time_elapsed_between(const event_t &start, const event_t &end)
Determine (inaccurately) the elapsed time between two events.
Definition: event.hpp:246
void record()
Schedule a specified event to occur (= to fire) when all activities already scheduled on the event&#39;s ...
Definition: event.hpp:161
Wrappers for getting and setting CUDA&#39;s choice of which device is &#39;current&#39;.
Facilities for exception-based handling of Runtime API errors, including a basic exception class wrap...
Can only be used by the process which created it.
Definition: constants.hpp:95
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
Proxy class for a CUDA device.
Definition: device.hpp:148
int id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:467
cudaEvent_t id_t
The CUDA Runtime API&#39;s numeric handle for events.
Definition: types.hpp:205