cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
event.hpp
Go to the documentation of this file.
1 
8 #pragma once
9 #ifndef CUDA_API_WRAPPERS_EVENT_HPP_
10 #define CUDA_API_WRAPPERS_EVENT_HPP_
11 
12 #include "types.hpp"
13 
14 #include <chrono> // for duration types
15 #include "constants.hpp"
16 #include "current_device.hpp"
17 #include "error.hpp"
18 #include "ipc.hpp"
19 
20 namespace cuda {
21 
23 class device_t;
24 class stream_t;
26 
27 namespace event {
28 
29 namespace detail_ {
30 
31 inline void destroy_in_current_context(
32  handle_t handle,
33  context::handle_t current_context_handle,
34  device::id_t current_device_id) noexcept(false)
35 {
36  auto status = cuEventDestroy(handle);
37  throw_if_error_lazy(status, "Failed destroying " +
38  identify(handle, current_context_handle, current_device_id));
39 }
40 
41 inline void destroy(
42  handle_t handle,
43  context::handle_t context_handle,
44  device::id_t device_id) noexcept(false)
45 {
46  CAW_SET_SCOPE_CONTEXT(context_handle);
47  destroy_in_current_context(handle, context_handle, device_id);
48 }
49 
50 inline void enqueue_in_current_context(stream::handle_t stream_handle, handle_t event_handle)
51 {
52  auto status = cuEventRecord(event_handle, stream_handle);
53  throw_if_error_lazy(status,
54  "Failed recording " + event::detail_::identify(event_handle)
55  + " on " + stream::detail_::identify(stream_handle));
56 }
57 
65 inline void enqueue(context::handle_t context_handle, stream::handle_t stream_handle, handle_t event_handle) {
66  context::current::detail_::scoped_ensurer_t { context_handle };
67  enqueue_in_current_context(stream_handle, event_handle);
68 }
69 
70 using flags_t = unsigned int;
71 
72 constexpr flags_t inline make_flags(bool uses_blocking_sync, bool records_timing, bool interprocess)
73 {
74  return
75  ( uses_blocking_sync ? CU_EVENT_BLOCKING_SYNC : 0 )
76  | ( records_timing ? 0 : CU_EVENT_DISABLE_TIMING )
77  | ( interprocess ? CU_EVENT_INTERPROCESS : 0 );
78 }
79 
80 } // namespace detail_
81 
82 } // namespace event
83 
85 class event_t;
87 
88 namespace event {
89 
109 event_t wrap(
110  device::id_t device_id,
111  context::handle_t context_handle,
112  handle_t event_handle,
113  bool take_ownership = false,
114  bool hold_pc_refcount_unit = false) noexcept;
115 
116 ::std::string identify(const event_t& event);
117 
118 } // namespace event
119 
134 inline void wait(const event_t& event);
135 
147 class event_t {
148 
149 public: // data member non-mutator getters
151  device::id_t device_id() const noexcept { return device_id_; };
152 
154  context::handle_t context_handle() const noexcept { return context_handle_; }
155 
157  event::handle_t handle() const noexcept { return handle_; }
158 
160  bool is_owning() const noexcept { return owning_; }
161 
163  bool holds_primary_context_reference() const noexcept { return holds_pc_refcount_unit_; }
164 
166  device_t device() const;
167 
169  context_t context() const;
170 
171 
172 
173 public: // other non-mutator methods
174 
186  bool has_occurred() const
187  {
188  auto status = cuEventQuery(handle_);
189  if (status == cuda::status::success) return true;
190  if (status == cuda::status::async_operations_not_yet_completed) return false;
191  throw cuda::runtime_error(status,
192  "Could not determine whether " + event::detail_::identify(handle_)
193  + "has already occurred or not");
194  }
195 
200  bool query() const { return has_occurred(); }
201 
202 public: // other mutator methods
203 
210  void record() const
211  {
212  event::detail_::enqueue(context_handle_, stream::default_stream_handle, handle_);
213  }
214 
221  void record(const stream_t& stream) const;
222 
229  void fire(const stream_t& stream) const;
230 
234  void synchronize() const
235  {
236  return cuda::wait(*this);
237  }
238 
239 protected: // constructors
240 
241  event_t(
242  device::id_t device_id,
243  context::handle_t context_handle,
244  event::handle_t event_handle,
245  bool take_ownership,
246  bool hold_pc_refcount_unit) noexcept
247  :
248  device_id_(device_id),
249  context_handle_(context_handle),
250  handle_(event_handle),
251  owning_(take_ownership),
252  holds_pc_refcount_unit_(hold_pc_refcount_unit) { }
253 
254 public: // friendship
255 
256  friend event_t event::wrap(
257  device::id_t device,
258  context::handle_t context_handle,
259  event::handle_t event_handle,
260  bool take_ownership,
261  bool hold_pc_refcount_unit) noexcept;
262 
263 public: // constructors and destructor
264 
265  // Events cannot be copied, despite our allowing non-owning class instances.
266  // The reason is that we might inadvertently copy an owning instance, creating
267  // a non-owning instance and letting the original owning instance go out of scope -
268  // thus destructing the C++ object, and destroying the underlying CUDA object.
269  // Essentially, that is like passing a reference to a local variable - which we
270  // may not do.
271  event_t(const event_t& other) = delete;
272 
273  event_t(event_t&& other) noexcept : event_t(
274  other.device_id_, other.context_handle_, other.handle_, other.owning_, other.holds_pc_refcount_unit_)
275  {
276  other.owning_ = false;
277  other.holds_pc_refcount_unit_ = false;
278  };
279 
280  ~event_t() DESTRUCTOR_EXCEPTION_SPEC
281  {
282  if (owning_) {
283 #ifndef THROW_IN_DESTRUCTORS
284  try
285 #endif
286  {
287  event::detail_::destroy(handle_, context_handle_, device_id_);
288  }
289 #ifndef THROW_IN_DESTRUCTORS
290  catch (...) {}
291 #endif
292  }
293  if (holds_pc_refcount_unit_) {
294  device::primary_context::detail_::decrease_refcount_in_dtor(device_id_);
295  }
296  }
297 
298 public: // operators
299 
300  event_t& operator=(const event_t&) = delete;
301  event_t& operator=(event_t&& other) noexcept
302  {
303  ::std::swap(device_id_, other.device_id_);
304  ::std::swap(context_handle_, other.context_handle_);
305  ::std::swap(handle_, other.handle_);
306  ::std::swap(owning_, other.owning_);
307  ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
308  return *this;
309  }
310 
311 protected: // data members
312  device::id_t device_id_;
313  context::handle_t context_handle_;
314  event::handle_t handle_;
315  bool owning_;
316  // this field is mutable only for enabling move construction; other
317  // than in that case it must not be altered
318  bool holds_pc_refcount_unit_;
319  // When context_handle_ is the handle of a primary context, this event may
320  // be "keeping that context alive" through the refcount - in which case
321  // it must release its refcount unit on destruction
322 };
323 
324 namespace event {
325 
330 using duration_t = ::std::chrono::duration<float, ::std::milli>;
331 
343 {
344  float elapsed_milliseconds;
345  auto status = cuEventElapsedTime(&elapsed_milliseconds, start.handle(), end.handle());
346  throw_if_error_lazy(status, "determining the time elapsed between events");
347  return duration_t { elapsed_milliseconds };
348 }
349 
350 inline duration_t time_elapsed_between(const ::std::pair<const event_t&, const event_t&>& event_pair)
351 {
352  return time_elapsed_between(event_pair.first, event_pair.second);
353 }
354 
355 inline event_t wrap(
356  device::id_t device_id,
357  context::handle_t context_handle,
358  handle_t event_handle,
359  bool take_ownership,
360  bool hold_pc_refcount_unit) noexcept
361 {
362  return { device_id, context_handle, event_handle, take_ownership, hold_pc_refcount_unit };
363 }
364 
365 namespace detail_ {
366 
367 inline ::std::string identify(const event_t& event)
368 {
369  return identify(event.handle(), event.context_handle(), event.device_id());
370 }
371 
372 inline handle_t create_raw_in_current_context(flags_t flags = 0u)
373 {
374  cuda::event::handle_t new_event_handle;
375  auto status = cuEventCreate(&new_event_handle, flags);
376  throw_if_error_lazy(status, "Failed creating a CUDA event");
377  return new_event_handle;
378 }
379 
380 // Notes:
381 // * For now, event_t's need their device's ID - even if it's the current device;
382 // that explains the requirement in this function's interface.
383 // * Similarly, this function does not know whether the context is primary or
384 // not, and it is up to the caller to know that and decide whether the event
385 // proxy should decrease the primary context refcount on destruction
386 inline event_t create_in_current_context(
387  device::id_t current_device_id,
388  context::handle_t current_context_handle,
389  bool hold_pc_refcount_unit,
390  bool uses_blocking_sync,
391  bool records_timing,
392  bool interprocess)
393 {
394  auto flags = make_flags(uses_blocking_sync, records_timing, interprocess);
395  auto new_event_handle = create_raw_in_current_context(flags);
396  return wrap(current_device_id, current_context_handle, new_event_handle, do_take_ownership, hold_pc_refcount_unit);
397 }
398 
403 inline event_t create(
404  device::id_t device_id,
405  context::handle_t context_handle,
406  bool hold_pc_refcount_unit,
407  bool uses_blocking_sync,
408  bool records_timing,
409  bool interprocess)
410 {
411  CAW_SET_SCOPE_CONTEXT(context_handle);
412 
413  return detail_::create_in_current_context(
414  device_id, context_handle,
415  hold_pc_refcount_unit,
416  uses_blocking_sync, records_timing, interprocess);
417 }
418 
419 } // namespace detail_
420 
433  const device_t& device,
434  bool uses_blocking_sync = sync_by_busy_waiting, // Yes, that's the runtime default
435  bool records_timing = do_record_timings,
437 
449 inline event_t create(
450  const context_t& context,
451  bool uses_blocking_sync = sync_by_busy_waiting,
452  bool records_timing = do_record_timings,
454 
455 } // namespace event
456 
457 inline void wait(const event_t& event)
458 {
459  auto context_handle = event.context_handle();
460  auto event_handle = event.handle();
461  context::current::detail_::scoped_override_t context_for_this_scope(context_handle);
462  auto status = cuEventSynchronize(event_handle);
463  throw_if_error_lazy(status, "Failed synchronizing " + event::detail_::identify(event));
464 }
465 
466 inline void synchronize(const event_t& event)
467 {
468  return wait(event);
469 }
470 
471 } // namespace cuda
472 
473 #endif // CUDA_API_WRAPPERS_EVENT_HPP_
::std::chrono::duration< float, ::std::milli > duration_t
The type used by the CUDA Runtime API to represent the time difference between pairs of events...
Definition: event.hpp:330
wrappers for CUDA&#39;s facilities for sharing on-device memory addresses and CUDA events between host pr...
Proxy class for a CUDA stream.
Definition: stream.hpp:258
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:157
bool is_owning() const noexcept
True if this wrapper is responsible for telling CUDA to destroy the event upon the wrapper&#39;s own dest...
Definition: event.hpp:160
void synchronize() const
See.
Definition: event.hpp:234
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t device_id() const noexcept
The raw CUDA ID for the device w.r.t. which the event is defined.
Definition: event.hpp:151
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
Wrapper class for a CUDA event.
Definition: event.hpp:147
bool has_occurred() const
Has this event already occurred, or is it still pending on a stream?
Definition: event.hpp:186
STL namespace.
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:457
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:154
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:214
bool query() const
An alias for {event_t::has_occurred()} - to conform to how the CUDA runtime API names this functional...
Definition: event.hpp:200
void start()
Start CUDA profiling for the current process.
Definition: profiling.hpp:229
event_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t event_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing CUDA event in a event_t instance.
Definition: event.hpp:355
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:282
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:980
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45
void record() const
Schedule a specified event to occur (= to fire) when all activities already scheduled on the event&#39;s ...
Definition: event.hpp:210
duration_t time_elapsed_between(const event_t &start, const event_t &end)
Determine (inaccurately) the elapsed time between two events.
Definition: event.hpp:342
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
Wrappers for getting and setting CUDA&#39;s choice of which device is &#39;current&#39;.
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:236
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
bool holds_primary_context_reference() const noexcept
True if this wrapper has been associated with an increase of the device&#39;s primary context&#39;s reference...
Definition: event.hpp:163
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
Can only be used by the process which created it.
Definition: constants.hpp:95
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
Wrapper class for a CUDA device.
Definition: device.hpp:135
Fundamental CUDA-related type definitions.