eyalroz/cuda-api-wrappers/event_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_EVENT_HPP_
 #define CUDA_API_WRAPPERS_EVENT_HPP_

 #include "types.hpp"

 #include <chrono> // for duration types
 #include "constants.hpp"
 #include "current_device.hpp"
 #include "error.hpp"
 #include "ipc.hpp"

 namespace cuda {

 class device_t;
 class stream_t;

 namespace event {

 namespace detail_ {

 inline void destroy(
     handle_t           handle,
     device::id_t       device_id,
     context::handle_t  context_handle);

 inline void enqueue_in_current_context(stream::handle_t stream_handle, handle_t event_handle)
 {
     auto status = cuEventRecord(event_handle, stream_handle);
     throw_if_error_lazy(status,
         "Failed recording " + event::detail_::identify(event_handle)
         + " on " + stream::detail_::identify(stream_handle));
 }

 inline void enqueue(context::handle_t context_handle, stream::handle_t stream_handle, handle_t event_handle) {
     context::current::detail_::scoped_ensurer_t { context_handle };
     enqueue_in_current_context(stream_handle, event_handle);
 }

 using flags_t = unsigned int;

 constexpr flags_t inline make_flags(bool uses_blocking_sync, bool records_timing, bool interprocess)
 {
     return
           ( uses_blocking_sync  ? CU_EVENT_BLOCKING_SYNC : 0  )
         | ( records_timing      ? 0 : CU_EVENT_DISABLE_TIMING )
         | ( interprocess        ? CU_EVENT_INTERPROCESS : 0  );
 }

 } // namespace detail_

 } // namespace event

 class event_t;

 namespace event {

 event_t wrap(
     device::id_t       device_id,
     context::handle_t  context_handle,
     handle_t           event_handle,
     bool               take_ownership = false,
     bool               hold_pc_refcount_unit = false) noexcept;

 ::std::string identify(const event_t& event);

 } // namespace event

 inline void wait(const event_t& event);

 class event_t {

 public: // data member non-mutator getters
     device::id_t      device_id()       const noexcept { return device_id_; };

     context::handle_t context_handle()  const noexcept { return context_handle_; }

     event::handle_t   handle()          const noexcept { return handle_; }

     bool              is_owning()       const noexcept { return owning_; }

     bool              holds_primary_context_reference() const noexcept { return holds_pc_refcount_unit_; }

     device_t          device()          const;

     context_t         context()         const;


 public: // other non-mutator methods

     bool has_occurred() const
     {
         auto status = cuEventQuery(handle_);
         if (status == cuda::status::success) return true;
         if (status == cuda::status::async_operations_not_yet_completed) return false;
         throw cuda::runtime_error(status,
             "Could not determine whether " + event::detail_::identify(handle_)
             + "has already occurred or not");
     }

     bool query() const { return has_occurred(); }

 public: // other mutator methods

     void record() const
     {
         event::detail_::enqueue(context_handle_, stream::default_stream_handle, handle_);
     }

     void record(const stream_t& stream) const;

     void fire(const stream_t& stream) const;

     void synchronize() const
     {
         return cuda::wait(*this);
     }

 protected: // constructors

     event_t(
         device::id_t device_id,
         context::handle_t context_handle,
         event::handle_t event_handle,
         bool take_ownership,
         bool hold_pc_refcount_unit) noexcept
     :
         device_id_(device_id),
         context_handle_(context_handle),
         handle_(event_handle),
         owning_(take_ownership),
         holds_pc_refcount_unit_(hold_pc_refcount_unit) { }

 public: // friendship

     friend event_t event::wrap(
         device::id_t       device,
         context::handle_t  context_handle,
         event::handle_t    event_handle,
         bool               take_ownership,
         bool               hold_pc_refcount_unit) noexcept;

 public: // constructors and destructor

     // Events cannot be copied, despite our allowing non-owning class instances.
     // The reason is that we might inadvertently copy an owning instance, creating
     // a non-owning instance and letting the original owning instance go out of scope -
     // thus destructing the C++ object, and destroying the underlying CUDA object.
     // Essentially, that is like passing a reference to a local variable - which we
     // may not do.
     event_t(const event_t& other) = delete;

     event_t(event_t&& other) noexcept : event_t(
         other.device_id_, other.context_handle_, other.handle_, other.owning_, other.holds_pc_refcount_unit_)
     {
         other.owning_ = false;
         other.holds_pc_refcount_unit_ = false;
     };

     ~event_t() noexcept(false)
     {
         if (owning_) {
 #ifdef NDEBUG
             cuEventDestroy(handle_);
                 // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
                 // because the event cannot possibly exist after this call.
 #else
             event::detail_::destroy(handle_, device_id_, context_handle_);
 #endif
         }
         // TODO: DRY
         if (holds_pc_refcount_unit_) {
 #ifdef NDEBUG
             device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
                 // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
                 // because a failure probably means the primary context is inactive already
 #else
             device::primary_context::detail_::decrease_refcount(device_id_);
 #endif
         }
     }

 public: // operators

     event_t& operator=(const event_t&) = delete;
     event_t& operator=(event_t&& other) noexcept
     {
         ::std::swap(device_id_, other.device_id_);
         ::std::swap(context_handle_, other.context_handle_);
         ::std::swap(handle_, other.handle_);
         ::std::swap(owning_, other.owning_);
         ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
         return *this;
     }

 protected: // data members
     device::id_t       device_id_;
     context::handle_t  context_handle_;
     event::handle_t    handle_;
     bool               owning_;
         // this field is mutable only for enabling move construction; other
         // than in that case it must not be altered
     bool               holds_pc_refcount_unit_;
         // When context_handle_ is the handle of a primary context, this event may
         // be "keeping that context alive" through the refcount - in which case
         // it must release its refcount unit on destruction
 };

 namespace event {

 using duration_t = ::std::chrono::duration<float, ::std::milli>;

 inline duration_t time_elapsed_between(const event_t& start, const event_t& end)
 {
     float elapsed_milliseconds;
     auto status = cuEventElapsedTime(&elapsed_milliseconds, start.handle(), end.handle());
     throw_if_error_lazy(status, "determining the time elapsed between events");
     return duration_t { elapsed_milliseconds };
 }

 inline duration_t time_elapsed_between(const ::std::pair<const event_t&, const event_t&>& event_pair)
 {
     return time_elapsed_between(event_pair.first, event_pair.second);
 }

 inline event_t wrap(
     device::id_t       device_id,
     context::handle_t  context_handle,
     handle_t           event_handle,
     bool               take_ownership,
     bool               hold_pc_refcount_unit) noexcept
 {
     return { device_id, context_handle, event_handle, take_ownership, hold_pc_refcount_unit };
 }

 namespace detail_ {

 inline ::std::string identify(const event_t& event)
 {
     return identify(event.handle(), event.context_handle(), event.device_id());
 }

 inline handle_t create_raw_in_current_context(flags_t flags = 0u)
 {
     cuda::event::handle_t new_event_handle;
     auto status = cuEventCreate(&new_event_handle, flags);
     throw_if_error_lazy(status, "Failed creating a CUDA event");
     return new_event_handle;
 }

 // Notes:
 // * For now, event_t's need their device's ID - even if it's the current device;
 //   that explains the requirement in this function's interface.
 // * Similarly, this function does not know whether the context is primary or
 //   not, and it is up to the caller to know that and decide whether the event
 //   proxy should decrease the primary context refcount on destruction
 inline event_t create_in_current_context(
     device::id_t       current_device_id,
     context::handle_t  current_context_handle,
     bool               hold_pc_refcount_unit,
     bool               uses_blocking_sync,
     bool               records_timing,
     bool               interprocess)
 {
     auto flags = make_flags(uses_blocking_sync, records_timing, interprocess);
     auto new_event_handle = create_raw_in_current_context(flags);
     return wrap(current_device_id, current_context_handle, new_event_handle, do_take_ownership, hold_pc_refcount_unit);
 }

 inline void destroy_in_current_context(
     handle_t           handle,
     device::id_t       current_device_id,
     context::handle_t  current_context_handle)
 {
     auto status = cuEventDestroy(handle);
     throw_if_error_lazy(status, "Failed destroying " +
         identify(handle, current_context_handle, current_device_id));
 }

 inline event_t create(
     device::id_t       device_id,
     context::handle_t  context_handle,
     bool               hold_pc_refcount_unit,
     bool               uses_blocking_sync,
     bool               records_timing,
     bool               interprocess)
 {
     CAW_SET_SCOPE_CONTEXT(context_handle);

     return detail_::create_in_current_context(
         device_id, context_handle,
         hold_pc_refcount_unit,
         uses_blocking_sync, records_timing, interprocess);
 }

 inline void destroy(
     handle_t           handle,
     device::id_t       device_id,
     context::handle_t  context_handle)
 {
     CAW_SET_SCOPE_CONTEXT(context_handle);
     destroy_in_current_context(handle, device_id, context_handle);
 }

 } // namespace detail_

 event_t create(
     const device_t&  device,
     bool             uses_blocking_sync = sync_by_busy_waiting, // Yes, that's the runtime default
     bool             records_timing     = do_record_timings,
     bool             interprocess       = not_interprocess);

 inline event_t create(
     const context_t&  context,
     bool              uses_blocking_sync = sync_by_busy_waiting,
     bool              records_timing     = do_record_timings,
     bool              interprocess       = not_interprocess);

 } // namespace event

 inline void wait(const event_t& event)
 {
     auto context_handle = event.context_handle();
     auto event_handle = event.handle();
     context::current::detail_::scoped_override_t context_for_this_scope(context_handle);
     auto status = cuEventSynchronize(event_handle);
     throw_if_error_lazy(status, "Failed synchronizing " + event::detail_::identify(event));
 }

 inline void synchronize(const event_t& event)
 {
     return wait(event);
 }

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_EVENT_HPP_
cuda::event::duration_t
::std::chrono::duration< float, ::std::milli > duration_t
The type used by the CUDA Runtime API to represent the time difference between pairs of events...
Definition: event.hpp:321

ipc.hpp
wrappers for CUDA&#39;s facilities for sharing on-device memory addresses and CUDA events between host pr...

cuda::stream_t
Proxy class for a CUDA stream.
Definition: stream.hpp:246

cuda::event_t::handle
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143

cuda::event_t::is_owning
bool is_owning() const noexcept
True if this wrapper is responsible for telling CUDA to destroy the event upon the wrapper&#39;s own dest...
Definition: event.hpp:146

cuda::event_t::synchronize
void synchronize() const
See.
Definition: event.hpp:220

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::event_t::device_id
device::id_t device_id() const noexcept
The raw CUDA ID for the device w.r.t. which the event is defined.
Definition: event.hpp:137

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::event_t
Wrapper class for a CUDA event.
Definition: event.hpp:133

cuda::event_t::has_occurred
bool has_occurred() const
Has this event already occurred, or is it still pending on a stream?
Definition: event.hpp:172

std
STL namespace.

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::wait
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467

cuda::event_t::context_handle
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: event.hpp:140

cuda::event::handle_t
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217

cuda::event_t::query
bool query() const
An alias for {event_t::has_occurred()} - to conform to how the CUDA runtime API names this functional...
Definition: event.hpp:186

cuda::profiling::start
void start()
Start CUDA profiling for the current process.
Definition: profiling.hpp:229

cuda::event::wrap
event_t wrap(device::id_t device_id, context::handle_t context_handle, handle_t event_handle, bool take_ownership=false, bool hold_pc_refcount_unit=false) noexcept
Wrap an existing CUDA event in a event_t instance.
Definition: event.hpp:346

cuda::runtime_error
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271

cuda::synchronize
void synchronize(const context_t &context)
Waits for all previously-scheduled tasks on all streams (= queues) in a CUDA context to conclude...
Definition: context.hpp:968

cuda::event::create
event_t create(const device_t &device, bool uses_blocking_sync=sync_by_busy_waiting, bool records_timing=do_record_timings, bool interprocess=not_interprocess)
creates a new event on (the primary execution context of) a device.
Definition: event.hpp:45

cuda::event_t::record
void record() const
Schedule a specified event to occur (= to fire) when all activities already scheduled on the event&#39;s ...
Definition: event.hpp:196

cuda::event::time_elapsed_between
duration_t time_elapsed_between(const event_t &start, const event_t &end)
Determine (inaccurately) the elapsed time between two events.
Definition: event.hpp:333

throw_if_error_lazy
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316

cuda::array::handle_t
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34

current_device.hpp
Wrappers for getting and setting CUDA&#39;s choice of which device is &#39;current&#39;.

error.hpp
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...

cuda::stream::handle_t
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239

cuda::event::interprocess
Can be shared between processes. Must not be able to record timings.
Definition: constants.hpp:96

constants.hpp
Fundamental CUDA-related constants and enumerations, not dependent on any more complex abstractions...

cuda::event_t::holds_primary_context_reference
bool holds_primary_context_reference() const noexcept
True if this wrapper has been associated with an increase of the device&#39;s primary context&#39;s reference...
Definition: event.hpp:149

cuda::event::sync_by_busy_waiting
The thread calling event_.synchronize() will enter a busy-wait loop; this (might) minimize delay betw...
Definition: constants.hpp:70

cuda::event::not_interprocess
Can only be used by the process which created it.
Definition: constants.hpp:95

cuda::stream::default_stream_handle
const stream::handle_t default_stream_handle
The CUDA runtime provides a default stream on which work is scheduled when no stream is specified; fo...
Definition: constants.hpp:42

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

types.hpp
Fundamental CUDA-related type definitions.