10 #ifndef CUDA_API_WRAPPERS_PROFILING_HPP_ 11 #define CUDA_API_WRAPPERS_PROFILING_HPP_ 13 #include "../api/types.hpp" 14 #include "../api/error.hpp" 15 #include "../api/current_context.hpp" 16 #include "../api/stream.hpp" 17 #include "../api/event.hpp" 18 #include "../api/device.hpp" 19 #include "../api/multi_wrapper_impls/context.hpp" 21 #include <cudaProfiler.h> 23 #if CUDA_VERSION >= 10000 || defined(_WIN32) 24 #include <nvtx3/nvToolsExt.h> 25 #include <nvtx3/nvToolsExtCuda.h> 27 #include <nvToolsExt.h> 28 #include <nvToolsExtCuda.h> 32 #include <processthreadsapi.h> 35 #ifdef CUDA_API_WRAPPERS_USE_PTHREADS 38 #ifdef CUDA_API_WRAPPERS_USE_WIN32_THREADS 39 #include <processthreadsapi.h> 61 inline void set_message(nvtxEventAttributes_t &attrs,
const char *c_str) noexcept
63 attrs.messageType = NVTX_MESSAGE_TYPE_ASCII;
64 attrs.message.ascii = c_str;
67 inline void set_message(nvtxEventAttributes_t &attrs,
const wchar_t *wc_str) noexcept
69 attrs.messageType = NVTX_MESSAGE_TYPE_UNICODE;
70 attrs.message.unicode = wc_str;
73 inline void set_message(nvtxEventAttributes_t &attrs, nvtxStringHandle_t rsh) noexcept
75 attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
76 attrs.message.registered = rsh;
90 enum class type_t { unspecified, kernel, pci_express_transfer };
129 static_cast<underlying_type>(red) << 16 |
131 static_cast<underlying_type>(blue) << 0;
139 static constexpr
color_t Black() noexcept {
return from_hex(0x00000000); }
140 static constexpr
color_t White() noexcept {
return from_hex(0x00FFFFFF); }
141 static constexpr
color_t FullRed() noexcept {
return from_hex(0x00FF0000); }
142 static constexpr
color_t FullGreen() noexcept {
return from_hex(0x0000FF00); }
143 static constexpr
color_t FullBlue() noexcept {
return from_hex(0x000000FF); }
144 static constexpr
color_t FullYellow() noexcept {
return from_hex(0x00FFFF00); }
145 static constexpr
color_t LightRed() noexcept {
return from_hex(0x00FFDDDD); }
146 static constexpr
color_t LightGreen() noexcept {
return from_hex(0x00DDFFDD); }
147 static constexpr
color_t LightBlue() noexcept {
return from_hex(0x00DDDDFF); }
148 static constexpr
color_t LightYellow() noexcept {
return from_hex(0x00FFFFDD); }
149 static constexpr
color_t DarkRed() noexcept {
return from_hex(0x00880000); }
150 static constexpr
color_t DarkGreen() noexcept {
return from_hex(0x00008800); }
151 static constexpr
color_t DarkBlue() noexcept {
return from_hex(0x00000088); }
152 static constexpr
color_t DarkYellow() noexcept {
return from_hex(0x00888800); }
162 inline ::std::mutex& get_mutex() noexcept
164 static ::std::mutex profiler_mutex;
165 return profiler_mutex;
168 template <
typename CharT>
169 nvtxEventAttributes_t create_attributes(
const CharT* description,
color_t color)
171 nvtxEventAttributes_t eventAttrib = {0};
172 eventAttrib.version = NVTX_VERSION;
173 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
174 eventAttrib.colorType = NVTX_COLOR_ARGB;
175 eventAttrib.color = color;
176 profiling::detail_::set_message(eventAttrib,description);
184 template <
typename CharT>
187 auto attrs = detail_::create_attributes(description, color);
188 ::std::lock_guard<::std::mutex> guard{ detail_::get_mutex() };
202 template <
typename CharT>
204 const CharT* description,
206 color_t color = color_t::LightRed())
209 ::std::lock_guard<::std::mutex> guard{ detail_::get_mutex() };
210 auto attrs = detail_::create_attributes(description, color);
211 nvtxRangeId_t range_handle = nvtxRangeStartEx(&attrs);
212 static_assert(::std::is_same<range::handle_t, nvtxRangeId_t>::value,
213 "cuda::profiling::range::handle_t must be the same type as nvtxRangeId_t - but isn't.");
221 static_assert(::std::is_same<range::handle_t, nvtxRangeId_t>::value,
222 "cuda::profiling::range::handle_t must be the same type as nvtxRangeId_t - but isn't.");
223 nvtxRangeEnd(range_handle);
231 auto status = cuProfilerStart();
238 auto status = cuProfilerStop();
247 namespace profiling {
259 template <
typename CharT>
261 const CharT* description,
286 context::current::detail_::scoped_existence_ensurer_t context_existence_ensurer;
289 #define profile_this_scope() ::cuda::profiling::scope cuda_profiling_scope_{}; 293 template <
typename CharT>
294 void name_host_thread(uint32_t raw_thread_id,
const CharT*
name);
297 inline void name_host_thread<char>(uint32_t raw_thread_id,
const char*
name)
299 nvtxNameOsThreadA(raw_thread_id, name);
303 inline void name_host_thread<wchar_t>(uint32_t raw_thread_id,
const wchar_t*
name)
305 nvtxNameOsThreadW(raw_thread_id, name);
308 template <
typename CharT>
314 nvtxNameCuStreamA(stream_handle, name);
320 nvtxNameCuStreamW(stream_handle, name);
323 template <
typename CharT>
324 inline void name_event(
event::handle_t event_handle,
const CharT* name);
329 nvtxNameCuEventA(event_handle, name);
335 nvtxNameCuEventW(event_handle, name);
338 template <
typename CharT>
339 void name_device(
device::id_t device_id,
const CharT* name);
344 nvtxNameCuDeviceA(device_id, name);
348 inline void name_device<wchar_t>(
device::id_t device_id,
const wchar_t*
name)
350 nvtxNameCuDeviceW(device_id, name);
353 inline void name(::std::thread::id host_thread_id,
const char* name)
355 auto native_handle = *(
reinterpret_cast<const ::std::thread::native_handle_type*
>(&host_thread_id));
357 uint32_t thread_id = GetThreadId(native_handle);
359 if (native_handle >= ::std::numeric_limits<uint32_t>::max()) {
360 throw ::std::runtime_error(
"Native thread ID " + ::std::to_string(native_handle) +
361 " exceeds maximum representable thread ID " + ::std::to_string(::std::numeric_limits<uint32_t>::max()));
363 auto thread_id =
static_cast<uint32_t
>(native_handle);
365 name_host_thread(thread_id, name);}
376 template <
typename CharT>
377 void name(const ::std::thread& host_thread,
const CharT*
name);
386 template <
typename CharT>
393 template <
typename CharT>
396 context::current::detail_::scoped_override_t context_setter{stream.
context_handle()};
401 template <
typename CharT>
404 context::current::detail_::scoped_override_t context_setter{
event.context_handle()};
409 template <
typename CharT>
412 detail_::name_stream(device.
id(),
name);
418 #endif // CUDA_API_WRAPPERS_PROFILING_HPP_ range::handle_t range_start(const CharT *description, range::type_t type=range::type_t::unspecified, color_t color=color_t::LightRed())
Mark the beginning of a range on the profiler timeline, giving it also a color and some descriptive t...
Definition: profiling.hpp:203
A RAII/CADRe class whose scope of existence is reflected as a range in the profiler.
Definition: profiling.hpp:257
context::handle_t context_handle() const noexcept
The raw CUDA handle for the context in which the represented stream is defined.
Definition: stream.hpp:260
Proxy class for a CUDA stream.
Definition: stream.hpp:246
event::handle_t handle() const noexcept
The raw CUDA handle for this event.
Definition: event.hpp:143
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps.
Definition: stream.hpp:257
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
void stop()
Stop CUDA profiling for the current process.
Definition: profiling.hpp:236
nvtxRangeId_t handle_t
The raw handle of a CUDA profiling range.
Definition: profiling.hpp:93
Wrapper class for a CUDA event.
Definition: event.hpp:133
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
void range_end(range::handle_t range_handle)
Mark the end of a range, using the handle obtained when previously marking its beginning.
Definition: profiling.hpp:219
type_t
Types of profiled ranges we recognize.
Definition: profiling.hpp:90
device::id_t id() const noexcept
Return the proxied device's ID.
Definition: device.hpp:594
CUevent handle_t
The CUDA driver's raw handle for events.
Definition: types.hpp:217
static constexpr color_t from_hex(underlying_type raw_argb) noexcept
Construct a profiler color value from a numeric value (typically, an 8-hex-digit literal) ...
Definition: profiling.hpp:115
void point(const CharT *description, color_t color=color_t::Black())
Mark a single point on the profiler timeline, giving it also a color and some descriptive text...
Definition: profiling.hpp:185
void start()
Start CUDA profiling for the current process.
Definition: profiling.hpp:229
void name_this_thread(const CharT *name)
Have the profiler refer to the current thread using a specified string identifier (rather than its nu...
Definition: profiling.hpp:387
channel_value alpha
A profiler color is made up of three color channels and a transparency or "alpha" channel...
Definition: profiling.hpp:111
A class to instantiate in the part of your application which does any work you intend to use the CUDA...
Definition: profiling.hpp:281
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
::std::uint32_t underlying_type
A profiler color corresponds to a 32-bit value.
Definition: profiling.hpp:104
::std::uint8_t channel_value
Each color channel is an 8-bit value.
Definition: profiling.hpp:107
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
static constexpr color_t Black() noexcept
Some basic colors, for convenience.
Definition: profiling.hpp:139
void name(const ::std::thread &host_thread, const CharT *name)
Have the profiler refer to a given host thread, using a specified string identifier (rather than its ...
Wrapper class for a CUDA device.
Definition: device.hpp:135
void name(const device_t &device, const CharT *name)
Have the profile assign a name to a certain CUDA device.
Definition: profiling.hpp:410
underlying_type as_hex() const noexcept
Definition: profiling.hpp:125
An RGB colorspace color value, with potential transparency, which may be used to color elements in ti...
Definition: profiling.hpp:102