cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
typed_node.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_TYPED_NODE_HPP
8 #define CUDA_API_WRAPPERS_TYPED_NODE_HPP
9 
10 #if CUDA_VERSION >= 10000
11 
12 #include "node.hpp"
13 #include "../detail/for_each_argument.hpp"
14 #include "../error.hpp"
15 #include "../device.hpp"
16 #include "../event.hpp"
17 #include "../kernel.hpp"
18 #include "../launch_configuration.hpp"
19 #include "../memory_pool.hpp"
20 
21 #include <vector>
22 #include <iostream>
23 #include <utility>
24 #include <type_traits>
25 
26 
27 namespace cuda {
28 
29 namespace graph {
30 
38 template<typename... Ts>
39 ::std::vector<void*> make_kernel_argument_pointers(const Ts&... kernel_arguments)
40 {
41  // The extra curly brackets mean the vector constructor gets an ::std::initializer_list<void*>;
42  // Note that we don't add an extern terminating NULLPTR - the Graph API is different than
43  // the traditional kernel launch API in that respect
44  return { { const_cast<void *>(reinterpret_cast<const void*>(&kernel_arguments)) ... } };
45 }
46 
47 namespace node {
48 
49 enum class kind_t : ::std::underlying_type<CUgraphNodeType>::type {
50  kernel_launch = CU_GRAPH_NODE_TYPE_KERNEL,
51  memory_copy = CU_GRAPH_NODE_TYPE_MEMCPY, memcpy = memory_copy,
52  memory_set = CU_GRAPH_NODE_TYPE_MEMSET, memset = memory_set,
53  host_function_call = CU_GRAPH_NODE_TYPE_HOST,
54  child_graph = CU_GRAPH_NODE_TYPE_GRAPH,
55  empty = CU_GRAPH_NODE_TYPE_EMPTY,
56 #if CUDA_VERSION >= 11010
57  wait = CU_GRAPH_NODE_TYPE_WAIT_EVENT, wait_on_event = wait,
58  event = CU_GRAPH_NODE_TYPE_EVENT_RECORD, record_event = event,
59 #endif // CUDA_VERSION >= 11010
60 // Semaphores are not yet supported by the CUDA API wrapper library,
61 // neither in graphs or outside of them
62 //
63 // signal_semaphore = CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL,
64 // wait_on_semaphore = CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT,
65 #if CUDA_VERSION >= 11040
66  memory_allocation = CU_GRAPH_NODE_TYPE_MEM_ALLOC, malloc = memory_allocation,
67  memory_free = CU_GRAPH_NODE_TYPE_MEM_FREE, memfree = memory_free,
68 #endif // CUDA_VERSION >= 11040
69 #if CUDA_VERSION >= 11070
70  // Note: We support just one batch op, but the kind value is for any batch op
71  memory_barrier = CU_GRAPH_NODE_TYPE_BATCH_MEM_OP,
72 #endif
73 #if CUDA_VERSION >= 12030
74  conditional = CU_GRAPH_NODE_TYPE_CONDITIONAL
75 #endif // CUDA_VERSION >= 12030
76 };
77 
78 #if CUDA_VERSION >= 12030
79 namespace conditional {
80 
81 using handle_t = CUgraphConditionalHandle;
82 using kind_t = CUgraphConditionalNodeType;
83 using default_value_t = unsigned;
84 
85 namespace detail_ {
86 
87 using flags_t = unsigned;
88 
89 inline handle_t create(
90  template_::handle_t graph_template,
91  context::handle_t context_handle,
92  default_value_t default_value)
93 {
94  handle_t result;
95  auto status = cuGraphConditionalHandleCreate(
96  &result, graph_template, context_handle, default_value, CU_GRAPH_COND_ASSIGN_DEFAULT);
97  throw_if_error_lazy(status, "Failed creating a graph conditional");
98  return result;
99 }
100 
101 } // namespace detail_
102 
103 struct parameters_t {
104  kind_t kind;
105  optional<handle_t> handle;
106  optional<context::handle_t> context_handle; // Note this must match the context used to create the graph template handle
107  optional<template_::handle_t> graph_template_handle;
108  optional<default_value_t> default_value; // Ignored in the handle has already been created
109 };
110 
111 } // namespace conditional
112 #endif // CUDA_VERSION >= 12030
113 
114 namespace detail_ {
115 
116 // Note: Eventually, when could drop support for CUDA graphs for versions
117 // before 12.3 - and that would let us drop most of these structures and
118 // just use cuGraphAddNode, with marshalling of parameters into a
119 // CUgraphNodeParams.
120 template<kind_t Kind>
121 struct kind_traits;
122 
123 template<>
124 struct kind_traits<kind_t::empty> {
125  static constexpr const auto name = "empty";
126 #if CUDA_VERSION >= 12030
127  using raw_parameters_type = CUgraphNodeParams;
128  static constexpr const bool inserter_takes_context = false; // The context is passed within the parameter struct
129  static constexpr const bool inserter_takes_params_by_ptr = true;
130  using parameters_type = cuda::nullopt_t;
131 
132  static constexpr const auto inserter = cuGraphAddNode;
133  static constexpr const auto setter = cuGraphNodeSetParams;
134  static constexpr const auto getter = nullptr;
135 
136  static raw_parameters_type marshal(const parameters_type&) {
137  raw_parameters_type raw_params;
138  ::std::memset(&raw_params, 0, sizeof(raw_parameters_type));
139  raw_params.type = static_cast<CUgraphNodeType>(kind_t::empty);
140  return raw_params;
141  }
142 #endif // CUDA_VERSION >= 12030
143 };
144 
145 template<>
146 struct kind_traits<kind_t::child_graph> {
147  static constexpr const auto name = "child graph";
148  using raw_parameters_type = template_::handle_t;
149  static constexpr const bool inserter_takes_params_by_ptr = false;
150  static constexpr const bool inserter_takes_context = false;
151  using parameters_type = template_t;
152  static constexpr const auto inserter = cuGraphAddChildGraphNode; // 1 extra param
153  // no param setter!
154  static constexpr const auto getter = cuGraphChildGraphNodeGetGraph;
155 #if CUDA_VERSION >= 11010
156  static constexpr const auto instance_setter = cuGraphExecChildGraphNodeSetParams;
157 #endif
158 
159  static raw_parameters_type marshal(const parameters_type& params);
160 };
161 
162 #if CUDA_VERSION >= 11010
163 template<>
164 struct kind_traits<kind_t::record_event> {
165  static constexpr const auto name = "record event";
166  using raw_parameters_type = event::handle_t;
167  static constexpr const bool inserter_takes_context = false;
168  static constexpr const bool inserter_takes_params_by_ptr = false;
169  using parameters_type = event_t;
170  static constexpr const auto inserter = cuGraphAddEventRecordNode; // 1 extra param
171  static constexpr const auto setter = cuGraphEventRecordNodeSetEvent;
172  static constexpr const auto getter = cuGraphEventRecordNodeGetEvent;
173  static constexpr const auto instance_setter = cuGraphExecEventRecordNodeSetEvent;
174 
175  static raw_parameters_type marshal(const parameters_type& params)
176  {
177  return params.handle();
178  }
179 };
180 
181 template<>
182 struct kind_traits<kind_t::wait_on_event> {
183  static constexpr const auto name = "wait on event";
184  using raw_parameters_type = event::handle_t;
185  static constexpr const bool inserter_takes_context = false;
186  static constexpr const bool inserter_takes_params_by_ptr = false;
187  using parameters_type = event_t;
188  static constexpr const auto inserter = cuGraphAddEventWaitNode; // 1 extra param
189  static constexpr const auto setter = cuGraphEventWaitNodeSetEvent;
190  static constexpr const auto getter = cuGraphEventWaitNodeGetEvent;
191  static constexpr const auto instance_setter = cuGraphExecEventWaitNodeSetEvent;
192 
193  static raw_parameters_type marshal(const parameters_type& params)
194  {
195  return params.handle();
196 
197  }
198 };
199 #endif // CUDA_VERSION >= 11010
200 
201 template<>
202 struct kind_traits<kind_t::host_function_call> {
203  static constexpr const auto name = "host function call";
204  using raw_parameters_type = CUDA_HOST_NODE_PARAMS;
205  static constexpr const bool inserter_takes_context = false;
206  static constexpr const bool inserter_takes_params_by_ptr = true;
207  struct parameters_type {
208  stream::callback_t function_ptr;
209  void* user_data;
210  };
211  static constexpr const auto inserter = cuGraphAddHostNode; // 1 extra param
212  static constexpr const auto setter = cuGraphHostNodeSetParams;
213  static constexpr const auto getter = cuGraphHostNodeGetParams;
214  static constexpr const auto instance_setter = cuGraphExecHostNodeSetParams;
215 
216  static raw_parameters_type marshal(const parameters_type& params)
217  {
218  return { params.function_ptr, params.user_data };
219  }
220  static raw_parameters_type marshal(::std::pair<stream::callback_t, void*> param_pair)
221  {
222  return { param_pair.first, param_pair.second };
223  }
224 };
225 
226 template<>
227 struct kind_traits<kind_t::kernel_launch> {
228  static constexpr const auto name = "kernel launch";
229  using raw_parameters_type = CUDA_KERNEL_NODE_PARAMS;
230  static constexpr const bool inserter_takes_context = false;
231  static constexpr const bool inserter_takes_params_by_ptr = true;
232  struct parameters_type {
233  kernel_t kernel;
234  launch_configuration_t launch_config;
235  ::std::vector<void*> marshalled_arguments; // Does _not_ need a nullptr "argument" terminator
236  };
237  static constexpr const auto inserter = cuGraphAddKernelNode; // 1 extra param
238  static constexpr const auto setter = cuGraphKernelNodeSetParams;
239  static constexpr const auto getter = cuGraphKernelNodeGetParams;
240  static constexpr const auto instance_setter = cuGraphExecKernelNodeSetParams;
241 
242  static raw_parameters_type marshal(const parameters_type& params)
243  {
244  // With C++20, use designated initializers
245  raw_parameters_type raw_params;
246  // Note: With CUDA versions earlier than 11.0, this next instruction
247  // only supports non-apriori-compiled kernels, i.e. ones you compile
248  // dynamically or load from a module. To change this,; we woulds need
249  // to distinguish the inserter, raw parameters and marshal members
250  // between apriori and non-apriori kernels
251  raw_params.func = params.kernel.handle();
252 
253  raw_params.gridDimX = params.launch_config.dimensions.grid.x;
254  raw_params.gridDimY = params.launch_config.dimensions.grid.y;
255  raw_params.gridDimZ = params.launch_config.dimensions.grid.z;
256  raw_params.blockDimX = params.launch_config.dimensions.block.x;
257  raw_params.blockDimY = params.launch_config.dimensions.block.y;
258  raw_params.blockDimZ = params.launch_config.dimensions.block.z;
259  raw_params.sharedMemBytes = params.launch_config.dynamic_shared_memory_size;
260  raw_params.kernelParams = const_cast<decltype(raw_params.kernelParams)>(params.marshalled_arguments.data());
261  raw_params.extra = nullptr; // we don't use "extra"
262  return raw_params;
263  }
264 };
265 
266 #if CUDA_VERSION >= 11040
267 template<>
268 struct kind_traits<kind_t::memory_allocation> {
269  static constexpr const auto name = "memory allocation";
270  using raw_parameters_type = CUDA_MEM_ALLOC_NODE_PARAMS;
271  static constexpr const bool inserter_takes_context = false;
272  static constexpr const bool inserter_takes_params_by_ptr = true;
273  using parameters_type = ::std::pair<device_t, size_t>; // for now, assuming no peer access
274  // no setter
275  static constexpr const auto inserter = cuGraphAddMemAllocNode; // 1 extra param
276  // static constexpr const auto setter;
277  static constexpr const auto getter = cuGraphMemAllocNodeGetParams;
278 
279  static raw_parameters_type marshal(const parameters_type& params)
280  {
281  static constexpr const auto no_export_handle_kind = memory::pool::shared_handle_kind_t::no_export;
282  raw_parameters_type raw_params;
283  raw_params.poolProps = memory::pool::detail_::create_raw_properties<no_export_handle_kind>(params.first.id());
284  // TODO: DO we need to specify the allocation location in an access descriptor?
285  raw_params.accessDescs = nullptr; // for now, assuming no peer access
286  raw_params.accessDescCount = 0; // for now, assuming no peer access
287  raw_params.bytesize = params.second;
288  // TODO: Is the following really necessary to set to 0? Not sure, so let's
289  // do it as a precaution
290  raw_params.dptr = memory::device::address(nullptr);
291  return raw_params;
292  }
293 };
294 #endif // CUDA_VERSION >= 11040
295 
296 template<>
297 struct kind_traits<kind_t::memory_set> {
298  // TODO: Consider support for pitched memory; we would probably need mdspans for that...
299  static constexpr const auto name = "memory set";
300  using raw_parameters_type = CUDA_MEMSET_NODE_PARAMS;
301  static constexpr const bool inserter_takes_context = true;
302  static constexpr const bool inserter_takes_params_by_ptr = true;
303  struct parameters_type {
304  memory::region_t region;
305  size_t width_in_bytes; // cannot exceed 4
306  unsigned value; // must fit inside `width_in_bytes`, i.e. not exceed 2^(width*8) -1
307  };
308  static constexpr const auto inserter = cuGraphAddMemsetNode; // 2 extra params incl context
309  // no setter
310  static constexpr const auto getter = cuGraphMemsetNodeGetParams;
311 
312  static raw_parameters_type marshal(const parameters_type& params)
313  {
314  static constexpr const size_t max_width = sizeof(parameters_type::value);
315  if (params.width_in_bytes > max_width) {
316  throw ::std::invalid_argument("Unsupported memset value width (maximum is " + ::std::to_string(max_width));
317  }
318  const unsigned long min_overwide_value = 1lu << (params.width_in_bytes * CHAR_BIT);
319  if (static_cast<unsigned long>(params.value) >= min_overwide_value) {
320  throw ::std::invalid_argument("Memset value exceeds specified width");
321  }
322  CUDA_MEMSET_NODE_PARAMS raw_params;
323  raw_params.dst = memory::device::address(params.region.data());
324  // Not using raw_params.pitch since height is 1
325  raw_params.height = 1u;
326  raw_params.value = params.value;
327  raw_params.elementSize = static_cast<decltype(raw_params.elementSize)>(params.width_in_bytes);
328  raw_params.width = params.region.size() / params.width_in_bytes;
329  return raw_params;
330  }
331 };
332 
333 #if CUDA_VERSION >= 11040
334 template<>
335 struct kind_traits<kind_t::memory_free> {
336  static constexpr const auto name = "memory free";
337  using raw_parameters_type = CUdeviceptr;
338  static constexpr const bool inserter_takes_context = false;
339  static constexpr const bool inserter_takes_params_by_ptr = false; // the void* _is_ the parameter
340  using parameters_type = void*;
341  static constexpr const auto inserter = cuGraphAddMemFreeNode; // 1 extra param
342  // no setter
343  // static constexpr const auto setter = ;
344  static constexpr const auto getter = cuGraphMemFreeNodeGetParams;
345 
346  static raw_parameters_type marshal(const parameters_type& params)
347  {
348  return memory::device::address(params);
349  }
350 };
351 #endif // CUDA_VERSION >= 11040
352 
353 #if CUDA_VERSION >= 11070
354 template<>
355 struct kind_traits<kind_t::memory_barrier> {
356  static constexpr const auto name = "memory barrier";
357  using raw_parameters_type = CUDA_BATCH_MEM_OP_NODE_PARAMS;
358  static constexpr const bool inserter_takes_context = false;
359  static constexpr const bool inserter_takes_params_by_ptr = true;
360  using parameters_type = ::std::pair<context_t, cuda::memory::barrier_scope_t>;
361  static constexpr const auto inserter = cuGraphAddBatchMemOpNode; // 1 extra param
362  static constexpr const auto setter = cuGraphBatchMemOpNodeSetParams;
363  static constexpr const auto getter = cuGraphBatchMemOpNodeGetParams;
364 
365  static raw_parameters_type marshal(const parameters_type& params)
366  {
367  auto const & context = params.first;
368  raw_parameters_type raw_params;
369  raw_params.count = 1;
370  raw_params.ctx = context.handle();
371  raw_params.flags = 0;
372  CUstreamBatchMemOpParams memory_barrier_op;
373  memory_barrier_op.operation = CU_STREAM_MEM_OP_BARRIER;
374  memory_barrier_op.memoryBarrier.operation = CU_STREAM_MEM_OP_BARRIER;
375  auto const & scope = params.second;
376  memory_barrier_op.memoryBarrier.flags = static_cast<unsigned>(scope);
377  raw_params.paramArray = &memory_barrier_op;
378  return raw_params;
379  }
380 };
381 #endif // CUDA_VERSION >= 11070
382 
383 template<>
384 struct kind_traits<kind_t::memory_copy> {
385  static constexpr const auto name = "memory copy";
386  using raw_parameters_type = CUDA_MEMCPY3D;
387  static constexpr const bool inserter_takes_context = true;
388  static constexpr const bool inserter_takes_params_by_ptr = true;
389  static constexpr const dimensionality_t num_dimensions { 3 };
390  using parameters_type = memory::copy_parameters_t<3>;
391 
392  static constexpr const auto inserter = cuGraphAddMemcpyNode; // 2 extra params incl. context
393  static constexpr const auto setter = cuGraphMemcpyNodeSetParams;
394  static constexpr const auto getter = cuGraphMemcpyNodeGetParams;
395 
396  static raw_parameters_type marshal(const parameters_type& params) {
397  auto& params_ptr = const_cast<parameters_type&>(params);
398  // This is quite a nasty bit of voodoo. The thing is, CUDA_MEMCPY3D_PEER is
399  // the plain struct serving as the base class of copy_parameters_t<3>; and
400  // CUDA_MEMCPY3D and CUDA_MEMCPY3D_PEER are "punnable", i.e. they have the
401  // same size and field layout, with the former having some "reserved" fields
402  // which are not to be used, that in the latter are used for contexts.
403  return reinterpret_cast<CUDA_MEMCPY3D&>(params_ptr);
404  }
405 };
406 
407 #if CUDA_VERSION >= 12040
408 template<>
409 struct kind_traits<kind_t::conditional> {
410  static constexpr const auto name = "conditional";
411  using raw_parameters_type = CUgraphNodeParams; // within which, CUDA_CONDITIONAL_NODE_PARAMS will be used
412  static constexpr const bool inserter_takes_context = false; // The context is passed within the parameter struct
413  static constexpr const bool inserter_takes_params_by_ptr = true;
414  using parameters_type = conditional::parameters_t;
415 
416  static constexpr const auto inserter = cuGraphAddNode;
417  static constexpr const auto setter = cuGraphNodeSetParams;
418  static constexpr const auto getter = nullptr;
419 
420  static raw_parameters_type marshal(const parameters_type& params) {
421  raw_parameters_type raw_params;
422  ::std::memset(&raw_params, 0, sizeof(raw_parameters_type));
423  raw_params.type = static_cast<CUgraphNodeType>(kind_t::conditional);
424  raw_params.conditional.type = params.kind;
425  raw_params.conditional.ctx = params.context_handle ?
426  params.context_handle.value() : context::current::detail_::get_handle();
427  if (not params.handle and
428  not (params.graph_template_handle and params.context_handle and params.default_value)) {
429  throw ::std::invalid_argument(
430  "Conditional node creation parameters specify neither a pre-existing conditional handle, "
431  "nor the arguments required for its creation");
432  }
433  raw_params.conditional.handle = params.handle ? params.handle.value() :
434  conditional::detail_::create(
435  params.graph_template_handle.value(), params.context_handle.value(), params.default_value.value());
436  raw_params.conditional.size = 1;
437  return raw_params;
438  }
439 };
440 #endif // CUDA_VERSION >= 12300
441 
442 template <kind_t Kind, typename = typename ::std::enable_if<kind_traits<Kind>::inserter_takes_params_by_ptr>::type>
443 typename kind_traits<Kind>::raw_parameters_type *
444 maybe_add_ptr(const typename kind_traits<Kind>::raw_parameters_type& raw_params)
445 {
446  return const_cast<typename kind_traits<Kind>::raw_parameters_type *>(&raw_params);
447 }
448 
449 template <kind_t Kind, typename = typename ::std::enable_if<not kind_traits<Kind>::inserter_takes_params_by_ptr>::type>
450 const typename kind_traits<Kind>::raw_parameters_type&
451 maybe_add_ptr(const typename kind_traits<Kind>::raw_parameters_type& raw_params) { return raw_params; }
452 
453 } // namespace detail_
454 
455 template <kind_t Kind> using parameters_t = typename detail_::kind_traits<Kind>::parameters_type;
456 
457 template <kind_t Kind>
458 class typed_node_t;
459 
460 template <kind_t Kind>
461 typed_node_t<Kind> wrap(template_::handle_t graph_handle, handle_t handle, parameters_t<Kind> parameters) noexcept;
462 
463 template <kind_t Kind>
464 class typed_node_t : public node_t {
465  using parameters_type = parameters_t<Kind>;
466 protected:
467  using traits = detail_::kind_traits<Kind>;
468  using raw_parameters_type = typename traits::raw_parameters_type;
469  static constexpr char const * const name = traits::name;
470 
471 public:
475  const parameters_type& parameters() const noexcept
476  {
477  static_assert(Kind != kind_t::empty, "Empty CUDA graph nodes don't have parameters");
478  return params_;
479  }
480 
481  parameters_type requery_parameters() const
482  {
483  typename traits::raw_parameters_type raw_params;
484  if (traits::param_getter == nullptr) {
485  throw cuda::runtime_error(status::named_t::not_supported,
486  "Querying parameters is not supported for this kind of node: " + node::detail_::identify(*this));
487  }
488  auto status = traits::param_getter(handle(), &raw_params);
489  throw_if_error_lazy(status, "setting parameters for " + node::detail_::identify(*this));
490  params_ = traits::unmarshal(raw_params);
491  return params_;
492  }
493 
494  void set_parameters(parameters_t<Kind> parameters)
495  {
496  static_assert(Kind != kind_t::empty, "Empty CUDA graph nodes don't have parameters");
497  auto marshalled_params = traits::marshal(parameters);
498  auto status = traits::param_setter(handle(), &marshalled_params);
499  throw_if_error_lazy(status, "setting parameters for " + node::detail_::identify(*this));
500  }
501 
502 public: // friendship
503  friend typed_node_t node::wrap<Kind>(template_::handle_t graph_handle, node::handle_t handle, parameters_t<Kind> parameters) noexcept;
504 
505 protected: // constructors and destructors
506  typed_node_t(template_::handle_t graph_template_handle, handle_type handle, parameters_type parameters) noexcept
507  : node_t(graph_template_handle, handle), params_(::std::move(parameters)) { }
508 
509 public: // constructors and destructors
510  typed_node_t(const typed_node_t<Kind>&) = default; // It's a reference type, so copying is not a problem
511  typed_node_t(typed_node_t<Kind>&&) noexcept = default; // It's a reference type, so copying is not a problem
512 
513  typed_node_t<Kind>& operator=(typed_node_t<Kind> other) noexcept
514  {
515  node_t::operator=(other);
516  params_ = other.params_;
517  }
518 
519 protected: // data members
520  mutable parameters_t<Kind> params_;
521 };
522 
523 template <kind_t Kind>
524 typed_node_t<Kind> wrap(template_::handle_t graph_handle, handle_t handle, parameters_t<Kind> parameters) noexcept
525 {
526  return typed_node_t<Kind>{ graph_handle, handle, ::std::move(parameters) };
527 }
528 
529 } // namespace node
530 
531 inline node::parameters_t<node::kind_t::kernel_launch>
532 make_launch_primed_kernel(
533  kernel_t kernel,
534  launch_configuration_t launch_config,
535  const ::std::vector<void*>& argument_pointers)
536 {
537  return { ::std::move(kernel), ::std::move(launch_config), ::std::move(argument_pointers) };
538 }
539 
540 template <typename... KernelParameters>
541 node::parameters_t<node::kind_t::kernel_launch>
542 make_launch_primed_kernel(
543  kernel_t kernel,
544  launch_configuration_t launch_config,
545  const KernelParameters&... kernel_arguments)
546 {
547  return {
548  ::std::move(kernel),
549  ::std::move(launch_config),
550  make_kernel_arg_ptrs(kernel_arguments...)
551  };
552 }
553 
554 } // namespace graph
555 
556 } // namespace cuda
557 
558 #endif // CUDA_VERSION >= 10000
559 
560 #endif //CUDA_API_WRAPPERS_TYPED_NODE_HPP
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467
CUevent handle_t
The CUDA driver&#39;s raw handle for events.
Definition: types.hpp:217
size_t dimensionality_t
The index or number of dimensions of an entity (as opposed to the extent in any dimension) - typicall...
Definition: types.hpp:85
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
CUstreamCallback callback_t
The CUDA driver&#39;s raw handle for a host-side callback function.
Definition: types.hpp:257
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682
Graph template node proxy (base-)class base-class node_t and supporting code.