9 #ifndef CUDA_API_WRAPPERS_EVENT_HPP_ 10 #define CUDA_API_WRAPPERS_EVENT_HPP_ 38 auto status = cuEventRecord(event_handle, stream_handle);
40 "Failed recording " + event::detail_::identify(event_handle)
41 +
" on " + stream::detail_::identify(stream_handle));
52 context::current::detail_::scoped_ensurer_t { context_handle };
53 enqueue_in_current_context(stream_handle, event_handle);
56 using flags_t =
unsigned int;
58 constexpr flags_t
inline make_flags(
bool uses_blocking_sync,
bool records_timing,
bool interprocess)
61 ( uses_blocking_sync ? CU_EVENT_BLOCKING_SYNC : 0 )
62 | ( records_timing ? 0 : CU_EVENT_DISABLE_TIMING )
63 | ( interprocess ? CU_EVENT_INTERPROCESS : 0 );
99 bool take_ownership =
false,
100 bool hold_pc_refcount_unit =
false) noexcept;
102 ::
std::
string identify(const event_t& event);
120 inline
void wait(const event_t& event);
174 auto status = cuEventQuery(handle_);
175 if (status == cuda::status::success)
return true;
176 if (status == cuda::status::async_operations_not_yet_completed)
return false;
178 "Could not determine whether " + event::detail_::identify(handle_)
179 +
"has already occurred or not");
186 bool query()
const {
return has_occurred(); }
207 void record(
const stream_t& stream)
const;
215 void fire(
const stream_t& stream)
const;
232 bool hold_pc_refcount_unit) noexcept
234 device_id_(device_id),
235 context_handle_(context_handle),
236 handle_(event_handle),
237 owning_(take_ownership),
238 holds_pc_refcount_unit_(hold_pc_refcount_unit) { }
247 bool hold_pc_refcount_unit) noexcept;
257 event_t(
const event_t& other) =
delete;
259 event_t(event_t&& other) noexcept : event_t(
260 other.device_id_, other.context_handle_, other.handle_, other.owning_, other.holds_pc_refcount_unit_)
262 other.owning_ =
false;
263 other.holds_pc_refcount_unit_ =
false;
266 ~event_t() noexcept(
false)
270 cuEventDestroy(handle_);
274 event::detail_::destroy(handle_, device_id_, context_handle_);
278 if (holds_pc_refcount_unit_) {
280 device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
284 device::primary_context::detail_::decrease_refcount(device_id_);
291 event_t& operator=(
const event_t&) =
delete;
292 event_t& operator=(event_t&& other) noexcept
294 ::std::swap(device_id_, other.device_id_);
295 ::std::swap(context_handle_, other.context_handle_);
296 ::std::swap(handle_, other.handle_);
297 ::std::swap(owning_, other.owning_);
298 ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
309 bool holds_pc_refcount_unit_;
321 using duration_t = ::std::chrono::duration<float, ::std::milli>;
335 float elapsed_milliseconds;
336 auto status = cuEventElapsedTime(&elapsed_milliseconds, start.
handle(), end.
handle());
351 bool hold_pc_refcount_unit) noexcept
353 return { device_id, context_handle, event_handle, take_ownership, hold_pc_refcount_unit };
358 inline ::std::string identify(
const event_t& event)
360 return identify(event.
handle(),
event.context_handle(),
event.device_id());
363 inline handle_t create_raw_in_current_context(flags_t flags = 0u)
366 auto status = cuEventCreate(&new_event_handle, flags);
368 return new_event_handle;
377 inline event_t create_in_current_context(
380 bool hold_pc_refcount_unit,
381 bool uses_blocking_sync,
385 auto flags = make_flags(uses_blocking_sync, records_timing, interprocess);
386 auto new_event_handle = create_raw_in_current_context(flags);
387 return wrap(current_device_id, current_context_handle, new_event_handle, do_take_ownership, hold_pc_refcount_unit);
390 inline void destroy_in_current_context(
395 auto status = cuEventDestroy(handle);
397 identify(handle, current_context_handle, current_device_id));
407 bool hold_pc_refcount_unit,
408 bool uses_blocking_sync,
412 CAW_SET_SCOPE_CONTEXT(context_handle);
414 return detail_::create_in_current_context(
415 device_id, context_handle,
416 hold_pc_refcount_unit,
417 uses_blocking_sync, records_timing, interprocess);
425 CAW_SET_SCOPE_CONTEXT(context_handle);
426 destroy_in_current_context(handle, device_id, context_handle);
445 bool records_timing = do_record_timings,
462 bool records_timing = do_record_timings,
469 auto context_handle =
event.context_handle();
470 auto event_handle =
event.handle();
471 context::current::detail_::scoped_override_t context_for_this_scope(context_handle);
472 auto status = cuEventSynchronize(event_handle);
483 #endif // CUDA_API_WRAPPERS_EVENT_HPP_ ::std::chrono::duration< float, ::std::milli > duration_t
The type used by the CUDA Runtime API to represent the time difference between pairs of events...
Definition: event.hpp:321
wrappers for CUDA's facilities for sharing on-device memory addresses and CUDA events between host pr...
Proxy class for a CUDA stream.
Definition: stream.hpp:246
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143
bool is_owning() const noexcept
True if this wrapper is responsible for telling CUDA to destroy the event upon the wrapper's own dest...
Definition: event.hpp:146
void synchronize() const
See.
Definition: event.hpp:220
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t device_id() const noexcept
The raw CUDA ID for the device w.r.t. which the event is defined.
Definition: event.hpp:137
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
Wrapper class for a CUDA event.
Definition: event.hpp:133
bool has_occurred() const
Has this event already occurred, or is it still pending on a stream?
Definition: event.hpp:172
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:140
CUevent handle_t
The CUDA driver's raw handle for events.
Definition: types.hpp:217
bool query() const
An alias for {event_t::has_occurred()} - to conform to how the CUDA runtime API names this functional...
Definition: event.hpp:186
void start()
Start CUDA profiling for the current process.
Definition: profiling.hpp:229
event_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t event_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing CUDA event in a event_t instance.
Definition: event.hpp:346
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45
void record() const
Schedule a specified event to occur (= to fire) when all activities already scheduled on the event's ...
Definition: event.hpp:196
duration_t time_elapsed_between(const event_t &start, const event_t &end)
Determine (inaccurately) the elapsed time between two events.
Definition: event.hpp:333
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
Wrappers for getting and setting CUDA's choice of which device is 'current'.
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...
bool holds_primary_context_reference() const noexcept
True if this wrapper has been associated with an increase of the device's primary context's reference...
Definition: event.hpp:149
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70
Can only be used by the process which created it.
Definition: constants.hpp:95
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42
Wrapper class for a CUDA device.
Definition: device.hpp:135
Fundamental CUDA-related type definitions.