cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
event.hpp
Go to the documentation of this file.
1 
8 #pragma once
9 #ifndef CUDA_API_WRAPPERS_EVENT_HPP_
10 #define CUDA_API_WRAPPERS_EVENT_HPP_
11 
12 #include "types.hpp"
13 
14 #include <chrono> // for duration types
15 #include "constants.hpp"
16 #include "current_device.hpp"
17 #include "error.hpp"
18 #include "ipc.hpp"
19 
20 namespace cuda {
21 
23 class device_t;
24 class stream_t;
26 
27 namespace event {
28 
29 namespace detail_ {
30 
31 inline void destroy(
32  handle_t handle,
33  device::id_t device_id,
34  context::handle_t context_handle);
35 
36 inline void enqueue_in_current_context(stream::handle_t stream_handle, handle_t event_handle)
37 {
38  auto status = cuEventRecord(event_handle, stream_handle);
39  throw_if_error_lazy(status,
40  "Failed recording " + event::detail_::identify(event_handle)
41  + " on " + stream::detail_::identify(stream_handle));
42 }
43 
51 inline void enqueue(context::handle_t context_handle, stream::handle_t stream_handle, handle_t event_handle) {
52  context::current::detail_::scoped_ensurer_t { context_handle };
53  enqueue_in_current_context(stream_handle, event_handle);
54 }
55 
56 using flags_t = unsigned int;
57 
58 constexpr flags_t inline make_flags(bool uses_blocking_sync, bool records_timing, bool interprocess)
59 {
60  return
61  ( uses_blocking_sync ? CU_EVENT_BLOCKING_SYNC : 0 )
62  | ( records_timing ? 0 : CU_EVENT_DISABLE_TIMING )
63  | ( interprocess ? CU_EVENT_INTERPROCESS : 0 );
64 }
65 
66 } // namespace detail_
67 
68 } // namespace event
69 
71 class event_t;
73 
74 namespace event {
75 
95 event_t wrap(
96  device::id_t device_id,
97  context::handle_t context_handle,
98  handle_t event_handle,
99  bool take_ownership = false,
100  bool hold_pc_refcount_unit = false) noexcept;
101 
102 ::std::string identify(const event_t& event);
103 
104 } // namespace event
105 
120 inline void wait(const event_t& event);
121 
133 class event_t {
134 
135 public: // data member non-mutator getters
137  device::id_t device_id() const noexcept { return device_id_; };
138 
140  context::handle_t context_handle() const noexcept { return context_handle_; }
141 
143  event::handle_t handle() const noexcept { return handle_; }
144 
146  bool is_owning() const noexcept { return owning_; }
147 
149  bool holds_primary_context_reference() const noexcept { return holds_pc_refcount_unit_; }
150 
152  device_t device() const;
153 
155  context_t context() const;
156 
157 
158 
159 public: // other non-mutator methods
160 
172  bool has_occurred() const
173  {
174  auto status = cuEventQuery(handle_);
175  if (status == cuda::status::success) return true;
176  if (status == cuda::status::async_operations_not_yet_completed) return false;
177  throw cuda::runtime_error(status,
178  "Could not determine whether " + event::detail_::identify(handle_)
179  + "has already occurred or not");
180  }
181 
186  bool query() const { return has_occurred(); }
187 
188 public: // other mutator methods
189 
196  void record() const
197  {
198  event::detail_::enqueue(context_handle_, stream::default_stream_handle, handle_);
199  }
200 
207  void record(const stream_t& stream) const;
208 
215  void fire(const stream_t& stream) const;
216 
220  void synchronize() const
221  {
222  return cuda::wait(*this);
223  }
224 
225 protected: // constructors
226 
227  event_t(
228  device::id_t device_id,
229  context::handle_t context_handle,
230  event::handle_t event_handle,
231  bool take_ownership,
232  bool hold_pc_refcount_unit) noexcept
233  :
234  device_id_(device_id),
235  context_handle_(context_handle),
236  handle_(event_handle),
237  owning_(take_ownership),
238  holds_pc_refcount_unit_(hold_pc_refcount_unit) { }
239 
240 public: // friendship
241 
242  friend event_t event::wrap(
243  device::id_t device,
244  context::handle_t context_handle,
245  event::handle_t event_handle,
246  bool take_ownership,
247  bool hold_pc_refcount_unit) noexcept;
248 
249 public: // constructors and destructor
250 
251  // Events cannot be copied, despite our allowing non-owning class instances.
252  // The reason is that we might inadvertently copy an owning instance, creating
253  // a non-owning instance and letting the original owning instance go out of scope -
254  // thus destructing the C++ object, and destroying the underlying CUDA object.
255  // Essentially, that is like passing a reference to a local variable - which we
256  // may not do.
257  event_t(const event_t& other) = delete;
258 
259  event_t(event_t&& other) noexcept : event_t(
260  other.device_id_, other.context_handle_, other.handle_, other.owning_, other.holds_pc_refcount_unit_)
261  {
262  other.owning_ = false;
263  other.holds_pc_refcount_unit_ = false;
264  };
265 
266  ~event_t() noexcept(false)
267  {
268  if (owning_) {
269 #ifdef NDEBUG
270  cuEventDestroy(handle_);
271  // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
272  // because the event cannot possibly exist after this call.
273 #else
274  event::detail_::destroy(handle_, device_id_, context_handle_);
275 #endif
276  }
277  // TODO: DRY
278  if (holds_pc_refcount_unit_) {
279 #ifdef NDEBUG
280  device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
281  // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
282  // because a failure probably means the primary context is inactive already
283 #else
284  device::primary_context::detail_::decrease_refcount(device_id_);
285 #endif
286  }
287  }
288 
289 public: // operators
290 
291  event_t& operator=(const event_t&) = delete;
292  event_t& operator=(event_t&& other) noexcept
293  {
294  ::std::swap(device_id_, other.device_id_);
295  ::std::swap(context_handle_, other.context_handle_);
296  ::std::swap(handle_, other.handle_);
297  ::std::swap(owning_, other.owning_);
298  ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
299  return *this;
300  }
301 
302 protected: // data members
303  device::id_t device_id_;
304  context::handle_t context_handle_;
305  event::handle_t handle_;
306  bool owning_;
307  // this field is mutable only for enabling move construction; other
308  // than in that case it must not be altered
309  bool holds_pc_refcount_unit_;
310  // When context_handle_ is the handle of a primary context, this event may
311  // be "keeping that context alive" through the refcount - in which case
312  // it must release its refcount unit on destruction
313 };
314 
315 namespace event {
316 
321 using duration_t = ::std::chrono::duration<float, ::std::milli>;
322 
334 {
335  float elapsed_milliseconds;
336  auto status = cuEventElapsedTime(&elapsed_milliseconds, start.handle(), end.handle());
337  throw_if_error_lazy(status, "determining the time elapsed between events");
338  return duration_t { elapsed_milliseconds };
339 }
340 
341 inline duration_t time_elapsed_between(const ::std::pair<const event_t&, const event_t&>& event_pair)
342 {
343  return time_elapsed_between(event_pair.first, event_pair.second);
344 }
345 
346 inline event_t wrap(
347  device::id_t device_id,
348  context::handle_t context_handle,
349  handle_t event_handle,
350  bool take_ownership,
351  bool hold_pc_refcount_unit) noexcept
352 {
353  return { device_id, context_handle, event_handle, take_ownership, hold_pc_refcount_unit };
354 }
355 
356 namespace detail_ {
357 
358 inline ::std::string identify(const event_t& event)
359 {
360  return identify(event.handle(), event.context_handle(), event.device_id());
361 }
362 
363 inline handle_t create_raw_in_current_context(flags_t flags = 0u)
364 {
365  cuda::event::handle_t new_event_handle;
366  auto status = cuEventCreate(&new_event_handle, flags);
367  throw_if_error_lazy(status, "Failed creating a CUDA event");
368  return new_event_handle;
369 }
370 
371 // Notes:
372 // * For now, event_t's need their device's ID - even if it's the current device;
373 // that explains the requirement in this function's interface.
374 // * Similarly, this function does not know whether the context is primary or
375 // not, and it is up to the caller to know that and decide whether the event
376 // proxy should decrease the primary context refcount on destruction
377 inline event_t create_in_current_context(
378  device::id_t current_device_id,
379  context::handle_t current_context_handle,
380  bool hold_pc_refcount_unit,
381  bool uses_blocking_sync,
382  bool records_timing,
383  bool interprocess)
384 {
385  auto flags = make_flags(uses_blocking_sync, records_timing, interprocess);
386  auto new_event_handle = create_raw_in_current_context(flags);
387  return wrap(current_device_id, current_context_handle, new_event_handle, do_take_ownership, hold_pc_refcount_unit);
388 }
389 
390 inline void destroy_in_current_context(
391  handle_t handle,
392  device::id_t current_device_id,
393  context::handle_t current_context_handle)
394 {
395  auto status = cuEventDestroy(handle);
396  throw_if_error_lazy(status, "Failed destroying " +
397  identify(handle, current_context_handle, current_device_id));
398 }
399 
404 inline event_t create(
405  device::id_t device_id,
406  context::handle_t context_handle,
407  bool hold_pc_refcount_unit,
408  bool uses_blocking_sync,
409  bool records_timing,
410  bool interprocess)
411 {
412  CAW_SET_SCOPE_CONTEXT(context_handle);
413 
414  return detail_::create_in_current_context(
415  device_id, context_handle,
416  hold_pc_refcount_unit,
417  uses_blocking_sync, records_timing, interprocess);
418 }
419 
420 inline void destroy(
421  handle_t handle,
422  device::id_t device_id,
423  context::handle_t context_handle)
424 {
425  CAW_SET_SCOPE_CONTEXT(context_handle);
426  destroy_in_current_context(handle, device_id, context_handle);
427 }
428 
429 } // namespace detail_
430 
443  const device_t& device,
444  bool uses_blocking_sync = sync_by_busy_waiting, // Yes, that's the runtime default
445  bool records_timing = do_record_timings,
447 
459 inline event_t create(
460  const context_t& context,
461  bool uses_blocking_sync = sync_by_busy_waiting,
462  bool records_timing = do_record_timings,
464 
465 } // namespace event
466 
467 inline void wait(const event_t& event)
468 {
469  auto context_handle = event.context_handle();
470  auto event_handle = event.handle();
471  context::current::detail_::scoped_override_t context_for_this_scope(context_handle);
472  auto status = cuEventSynchronize(event_handle);
473  throw_if_error_lazy(status, "Failed synchronizing " + event::detail_::identify(event));
474 }
475 
476 inline void synchronize(const event_t& event)
477 {
478  return wait(event);
479 }
480 
481 } // namespace cuda
482 
483 #endif // CUDA_API_WRAPPERS_EVENT_HPP_
::std::chrono::duration< float, ::std::milli > duration_t
The type used by the CUDA Runtime API to represent the time difference between pairs of events...
Definition: event.hpp:321
wrappers for CUDA&#39;s facilities for sharing on-device memory addresses and CUDA events between host pr...
Proxy class for a CUDA stream.
Definition: stream.hpp:246
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143
bool is_owning() const noexcept
True if this wrapper is responsible for telling CUDA to destroy the event upon the wrapper&#39;s own dest...
Definition: event.hpp:146
void synchronize() const
See.
Definition: event.hpp:220
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t device_id() const noexcept
The raw CUDA ID for the device w.r.t. which the event is defined.
Definition: event.hpp:137
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
Wrapper class for a CUDA event.
Definition: event.hpp:133
bool has_occurred() const
Has this event already occurred, or is it still pending on a stream?
Definition: event.hpp:172
STL namespace.
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:140
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217
bool query() const
An alias for {event_t::has_occurred()} - to conform to how the CUDA runtime API names this functional...
Definition: event.hpp:186
void start()
Start CUDA profiling for the current process.
Definition: profiling.hpp:229
event_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t event_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing CUDA event in a event_t instance.
Definition: event.hpp:346
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45
void record() const
Schedule a specified event to occur (= to fire) when all activities already scheduled on the event&#39;s ...
Definition: event.hpp:196
duration_t time_elapsed_between(const event_t &start, const event_t &end)
Determine (inaccurately) the elapsed time between two events.
Definition: event.hpp:333
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
Wrappers for getting and setting CUDA&#39;s choice of which device is &#39;current&#39;.
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
bool holds_primary_context_reference() const noexcept
True if this wrapper has been associated with an increase of the device&#39;s primary context&#39;s reference...
Definition: event.hpp:149
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
Can only be used by the process which created it.
Definition: constants.hpp:95
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
Wrapper class for a CUDA device.
Definition: device.hpp:135
Fundamental CUDA-related type definitions.