cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
node_builder.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_NODE_BUILDER_HPP
8 #define CUDA_API_WRAPPERS_NODE_BUILDER_HPP
9 
10 #if CUDA_VERSION >= 10000
11 
12 #include "typed_node.hpp"
13 
14 namespace cuda {
15 
16 namespace graph {
17 
18 namespace node {
19 
20 namespace detail_ {
21 
22 inline ::std::logic_error make_unspec_error(const char *node_type, const char *missing_arg_name)
23 {
24  // Yes, returning it, not throwing it. This is an exception builder function
25  return ::std::logic_error(
26  ::std::string("Attempt to build a CUDA execution graph node of type ") + node_type +
27  " without specifying its " + missing_arg_name + " argument");
28 }
29 
30 } // namespace detail_
31 
32 template <kind_t Kind>
33 class typed_builder_t;
34 
35 class builder_t
36 {
37 public:
38  template <kind_t Kind>
39  typed_builder_t<Kind> kind() { return typed_builder_t<Kind>{}; }
40 };
41 
42 // Note: Can't build empty vertices for now
43 
44 template <>
45 class typed_builder_t<kind_t::child_graph> {
46 public:
47  static constexpr const kind_t kind = kind_t::child_graph;
48  using this_type = typed_builder_t<kind>;
49  using built_type = typed_node_t<kind>;
50  using traits = cuda::graph::node::detail_::kind_traits<kind>;
51  using params_type = traits::parameters_type;
52 
53 protected:
54  params_type params_;
55 
56  struct {
57  bool template_ { false };
58  } was_set; // Yes, this is an ugly alternative to using optionals
59 
60  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
61  // for further work via method invocation.
62  template <typename F> this_type& do_(F f) { f(); return *this; }
63 
64 public:
65  params_type& params() noexcept { return params_; }
66 
67  this_type& template_(template_t subgraph) {
68  return do_([&] {
69  params_ = ::std::move(subgraph);
70  was_set.template_ = true;
71  });
72  }
73 
74  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
75  {
76  if (not was_set.template_) {
77  throw detail_::make_unspec_error("child graph", "child graph template");
78  }
79  return graph_template.insert.node<kind>(std::move(params_));
80  }
81 }; // typed_builder_t<kind_t::child_graph>
82 
83 #if CUDA_VERSION >= 11010
84 
85 template <>
86 class typed_builder_t<kind_t::record_event> {
87 public:
88  static constexpr const kind_t kind = kind_t::record_event;
89  using this_type = typed_builder_t<kind>;
90  using built_type = typed_node_t<kind>;
91  using traits = cuda::graph::node::detail_::kind_traits<kind>;
92  using params_type = traits::parameters_type;
93 
94 protected:
95  params_type params_;
96 
97  struct {
98  bool event { false };
99  } was_set; // Yes, this is an ugly alternative to using optionals
100 
101  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
102  // for further work via method invocation.
103  template <typename F> this_type& do_(F f) { f(); return *this; }
104 
105 public:
106  params_type& params() noexcept { return params_; }
107 
108  this_type& event(event_t event) {
109  return do_([&] {
110  params_ = ::std::move(event);
111  was_set.event = true;
112  });
113  }
114 
115  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
116  {
117  if (not was_set.event) {
118  throw detail_::make_unspec_error("record event", "event");
119  }
120  return graph_template.insert.node<kind>(std::move(params_));
121  }
122 }; // typed_builder_t<kind_t::record_event>
123 
124 template <>
125 class typed_builder_t<kind_t::wait_on_event> {
126 public:
127  static constexpr const kind_t kind = kind_t::wait_on_event;
128  using this_type = typed_builder_t<kind>;
129  using built_type = typed_node_t<kind>;
130  using traits = cuda::graph::node::detail_::kind_traits<kind>;
131  using params_type = traits::parameters_type;
132 
133 protected:
134  params_type params_;
135 
136  struct {
137  bool event { false };
138  } was_set; // Yes, this is an ugly alternative to using optionals
139 
140  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
141  // for further work via method invocation.
142  template <typename F> this_type& do_(F f) { f(); return *this; }
143 
144 public:
145  params_type& params() noexcept { return params_; }
146 
147  this_type& event(event_t event) {
148  return do_([&] {
149  params_ = ::std::move(event);
150  was_set.event = true;
151  });
152  }
153 
154  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
155  {
156  if (not was_set.event) {
157  throw detail_::make_unspec_error("wait on event", "event");
158  }
159  return graph_template.insert.node<kind>(std::move(params_));
160  }
161 }; // typed_builder_t<kind_t::wait_event>
162 
163 #endif // CUDA_VERSION >= 11010
164 
165 template <>
166 class typed_builder_t<kind_t::host_function_call> {
167 public:
168  static constexpr const kind_t kind = kind_t::host_function_call;
169  using this_type = typed_builder_t<kind>;
170  using built_type = typed_node_t<kind>;
171  using traits = cuda::graph::node::detail_::kind_traits<kind>;
172  using params_type = traits::parameters_type;
173 
174 protected:
175  params_type params_;
176 
177  struct {
178  bool function_ptr_set { false };
179  bool user_argument_set {false };
180  } was_set; // Yes, this is an ugly alternative to using optionals
181 
182  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
183  // for further work via method invocation.
184  template <typename F> this_type& do_(F f) { f(); return *this; }
185 
186 public:
187  params_type& params() noexcept { return params_; }
188 
189  this_type function(stream::callback_t host_callback_function)
190  {
191  return do_([&] {
192  params_.function_ptr = host_callback_function;
193  was_set.function_ptr_set = true;
194  });
195  }
196 
197  this_type argument(void* callback_argument)
198  {
199  return do_([&] {
200  params_.user_data = callback_argument;
201  was_set.user_argument_set = true;
202  });
203  }
204 
205  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
206  {
207  if (not was_set.function_ptr_set) {
208  throw detail_::make_unspec_error("kernel_launch", "host callback function pointer");
209  }
210  if (not was_set.user_argument_set) {
211  throw detail_::make_unspec_error("kernel_launch", "user-specified callback function argument");
212  }
213  return graph_template.insert.node<kind>(params_);
214  }
215 }; // typed_builder_t<kind_t::host_function_call>
216 
217 template <>
218 class typed_builder_t<kind_t::kernel_launch> {
219 public:
220  static constexpr const kind_t kind = kind_t::kernel_launch;
221  using this_type = typed_builder_t<kind>;
222  using built_type = typed_node_t<kind>;
223  using traits = cuda::graph::node::detail_::kind_traits<kind>;
224  using params_type = traits::parameters_type;
225 
226 protected:
227  params_type params_ {
228  kernel_t { kernel::wrap(cuda::device::id_t(0), nullptr, nullptr) },
229  { 0, 0 },
230  { }
231  }; // An ugly way of constructing with invalid junk; see `was_set` below. We could
232  // have possibly used some kind of optional
233 
234  struct {
235  bool kernel { false };
236  bool launch_config { false };
237  bool marshalled_arguments { false };
238  } was_set; // Yes, this is an ugly alternative to using optionals; but - have
239  // you ever looked at the implementation of optional?...
240 
241  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
242  // for further work via method invocation.
243  template <typename F> this_type& do_(F f) { f(); return *this; }
244 
245 public:
246  params_type& params() noexcept { return params_; }
247 
248  this_type kernel(const kernel_t& kernel)
249  {
250  return do_([&] {
251  // we can't just make an assignment to the `kernel` field, we have to reassign
252  // the whole structure...
253  params_ = { kernel, params_.launch_config, ::std::move(params_.marshalled_arguments) };
254  was_set.kernel = true;
255  });
256  }
257 
258  // Note: There is _no_ member for passing an apriori compiled kernel
259  // function and a device, since that would either mean leaking a primary context ref unit,
260  // or actually holding on to one in this class, which doesn't make sense. The graph template
261  // can't hold a ref unit...
262 
263  this_type launch_configuration(launch_configuration_t launch_config)
264  {
265  return do_([&] {
266  params_.launch_config = launch_config;
267  was_set.launch_config = true;
268  });
269  }
270 
271  this_type marshalled_arguments(::std::vector<void*> argument_ptrs)
272  {
273  return do_([&] {
274  params_.marshalled_arguments = ::std::move(argument_ptrs);
275  was_set.marshalled_arguments = true;
276  });
277  }
278 
279  template <typename... Ts>
280  this_type arguments(Ts&&... args)
281  {
282  return marshalled_arguments(make_kernel_argument_pointers(::std::forward<Ts>(args)...));
283  }
284 
285  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
286  {
287  if (not was_set.kernel) {
288  throw detail_::make_unspec_error("kernel_launch", "kernel");
289  }
290  if (not was_set.launch_config) {
291  throw detail_::make_unspec_error("kernel_launch", "launch configuration");
292  }
293  if (not was_set.marshalled_arguments) {
294  throw detail_::make_unspec_error("kernel_launch", "launch arguments");
295  }
296  return graph_template.insert.node<kind>(params_);
297  }
298 }; // typed_builder_t<kind_t::kernel_launch>
299 
300 #if CUDA_VERSION >= 11040
301 
302 template <>
303 class typed_builder_t<kind_t::memory_allocation> {
304 public:
305  static constexpr const kind_t kind = kind_t::memory_allocation;
306  using this_type = typed_builder_t<kind>;
307  using built_type = typed_node_t<kind>;
308  using traits = cuda::graph::node::detail_::kind_traits<kind>;
309  using params_type = traits::parameters_type;
311 
312 protected:
313  params_type params_;
314 
315  struct {
316  bool device { false };
317  bool size_in_bytes {false };
318  } was_set; // Yes, this is an ugly alternative to using optionals
319 
320  template <typename F>
321  this_type& do_(F f)
322  {
323  f();
324  return *this;
325  }
326 
327 public:
328  params_type& params() { return params_; }
329 
330  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
331  {
332  if (not was_set.device) {
333  throw detail_::make_unspec_error("memory allocation", "device");
334  }
335  if (not was_set.size_in_bytes) {
336  throw detail_::make_unspec_error("memory allocation", "allocation size in bytes");
337  }
338  return graph_template.insert.node<kind>(params_);
339  }
340 
341  this_type& device(const device_t& device) {
342  return do_([&]{ params_.first = device; was_set.device = true; });
343  }
344  this_type& size(const size_t size) {
345  return do_([&]{ params_.second = size; was_set.size_in_bytes = true; });
346  }
347 }; // typed_builder_t<kind_t::memory_allocation>
348 
349 #endif // CUDA_VERSION >= 11040
350 
351 template <>
352 class typed_builder_t<kind_t::memory_copy> {
353 public:
354  static constexpr const kind_t kind = kind_t::memory_copy;
355  using this_type = typed_builder_t<kind>;
356  using built_type = typed_node_t<kind>;
357  using traits = cuda::graph::node::detail_::kind_traits<kind>;
358  using params_type = traits::parameters_type;
359  using dimensions_type = params_type::dimensions_type;
361 // static constexpr const dimensionality_t num_dimensions = traits::num_dimensions;
362 
363 
364 protected:
365  params_type params_;
366 
367  template <typename F>
368  this_type& do_(F f)
369  {
370  f();
371  return *this;
372  }
373 
374 public:
375  params_type& params() { return params_; }
376 // built_type build();
377 #if __cplusplus >= 201703L
378  CAW_MAYBE_UNUSED
379 #endif
380  built_type build_within(const cuda::graph::template_t& graph_template)
381  {
382  // TODO: What about the extent???!!!
383  return graph_template.insert.node<kind>(params_);
384  }
385 
386 // this_type& context(endpoint_t endpoint, const context_t& context) noexcept
387 // {
388 // do_([&] { params_.set_context(endpoint, context); } );
389 // }
390 //
391 // this_type& single_context(const context_t& context) noexcept
392 // {
393 // do_([&] { params_.set_single_context(context); } );
394 // }
395 
396  // Note: This next variadic method should not be necessary considering
397  // the one right after it which uses the forwarding idiom; and yet - if we
398  // only keep the forwarding-source-method, we get errors.
399 // template <typename... Ts>
400 // this_type& source(const Ts&... args) {
401 // return do_([&]{ params_.set_source(args...); });
402 // }
403 
404  template <typename... Ts>
405  this_type& source(Ts&&... args) {
406  return do_([&]{ params_.set_source(std::forward<Ts>(args)...); });
407  }
408 //
409 // template <typename... Ts>
410 // this_type& destination(const Ts&... args) {
411 // return do_([&]{ params_.set_destination(args...); });
412 // }
413 
414  template <typename... Ts>
415  this_type& destination(Ts&&... args) {
416  return do_([&]{ params_.set_destination(std::forward<Ts>(args)...); });
417  }
418 
419  template <typename... Ts>
420  this_type& endpoint(endpoint_t endpoint, Ts&&... args) {
421  return do_([&]{ params_.set_endpoint(endpoint, ::std::forward<Ts>(args)...); });
422  }
423 
424 // this_type& source_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions) noexcept
425 // {
426 // return do_([&] { params_.set_endpoint_untyped(endpoint_t::source, context_handle, ptr, dimensions); } );
427 // }
428 //
429 // this_type& destination_untyped(context::handle_t context_handle, void *ptr, dimensions_type dimensions) noexcept
430 // {
431 // return do_([&] { params_.set_destination_untyped(context_handle, ptr, dimensions); } );
432 // }
433 //
434 // this_type& endpoint_untyped(endpoint_t endpoint, context::handle_t context_handle, void *ptr, dimensions_type dimensions) noexcept
435 // {
436 // return do_([&] { params_.set_endpoint_untyped(endpoint_t::source, context_handle, ptr, dimensions); } );
437 // }
438 
439  // TODO: Need a proper builder for copy parameters; otherwise we'll need to implement one here, when it's
440  // already half-implemented there... it will need:
441  // 1. To sort out context stuff (already done in the copy parameters, but requires explicit setting atm
442  // 2. deduce extent when none specified
443  // 3. prevent direct manipulation of the parameters (which is currently allowed), so that we can apply logic
444  // such as "has the extent been set?" etc.
445  // 4. set defaults when relevant, e.g. w.r.t. pitches and such
446 }; // typed_builder_t<kind_t::memory_copy>
447 
448 template <>
449 class typed_builder_t<kind_t::memory_set> {
450  // Note: Unlike memory_copy, for which the underlying parameter type, CUDA_MEMCPY3D_PEER, is also used
451  // in non-graph context - here the only builder functionality is for graph vertex construction; so we don't
452  // do any forwarding to a rich parameters class or its own builder.
453 public:
454  static constexpr const kind_t kind = kind_t::memory_set;
455  using this_type = typed_builder_t<kind>;
456  using built_type = typed_node_t<kind>;
457  using traits = cuda::graph::node::detail_::kind_traits<kind>;
458  using params_type = traits::parameters_type;
459 
460 protected:
461  params_type params_;
462  struct {
463  bool region { false };
464  bool value_and_width { false };
465  } was_set;
466 
467  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
468  // for further work via method invocation.
469  template <typename F> this_type& do_(F f) { f(); return *this; }
470 
471  template <typename T>
472  void set_width() {
473  }
474 
475 public:
476  const params_type& params() { return params_; }
477 
478  this_type region(memory::region_t region) noexcept
479  {
480  return do_([&] { params_.region = region; was_set.region = true;});
481  }
482 
483  template <typename T>
484  this_type value(uint32_t v) noexcept
485  {
486  static_assert(sizeof(T) <= 4, "Type of value to set is too wide; maximum size is 4");
487  static_assert(sizeof(T) != 3, "Size of type to set is not a power of 2");
488  static_assert(std::is_trivially_copy_constructible<T>::value, "Only a trivially-constructible value can be used for memset'ing");
489  return do_([&] {
490  params_.width_in_bytes = sizeof(T);
491  switch(sizeof(T)) {
492  // TODO: Maybe we should use uint_t<N> template? Maybe use if constexpr with C++17?
493  case 1: params_.value = reinterpret_cast<uint8_t&>(v); break;
494  case 2: params_.value = reinterpret_cast<uint16_t&>(v); break;
495  case 4:
496  default: params_.value = reinterpret_cast<uint32_t&>(v); break;
497  }
498  was_set.value_and_width = true;
499  });
500  }
501 
502  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
503  {
504  if (not was_set.region) {
505  throw detail_::make_unspec_error("memory set", "memory region");
506  }
507  if (not was_set.value_and_width) {
508  throw detail_::make_unspec_error("memory set", "value to set");
509  }
510  return graph_template.insert.node<kind>(params_);
511  }
512 }; // typed_builder_t<kind_t::memory_set>
513 
514 #if CUDA_VERSION >= 11040
515 template <>
516 class typed_builder_t<kind_t::memory_free> {
517 public:
518  static constexpr const kind_t kind = kind_t::memory_free;
519  using this_type = typed_builder_t<kind>;
520  using built_type = typed_node_t<kind>;
521  using traits = cuda::graph::node::detail_::kind_traits<kind>;
522  using params_type = traits::parameters_type;
523 
524 protected:
525  params_type params_;
526  struct {
527  bool address { false };
528  } was_set;
529 
530  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
531  // for further work via method invocation.
532  template <typename F> this_type& do_(F f) { f(); return *this; }
533 
534 public:
535  const params_type& params() { return params_; }
536 
537  this_type region(void* address) noexcept { return do_([&] { params_ = address; was_set.address = true;}); }
538  this_type region(memory::region_t allocated_region) noexcept { return this->region(allocated_region.data()); }
539 
540  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
541  {
542  if (not was_set.address) {
543  throw detail_::make_unspec_error("memory free", "allocated region starting address");
544  }
545  return graph_template.insert.node<kind>(params_);
546  }
547 }; // typed_builder_t<kind_t::memory_free>
548 
549 #endif // CUDA_VERSION >= 11040
550 
551 #if CUDA_VERSION >= 11070
552 template <>
553 class typed_builder_t<kind_t::memory_barrier> {
554 public:
555  static constexpr const kind_t kind = kind_t::memory_barrier;
556  using this_type = typed_builder_t<kind>;
557  using built_type = typed_node_t<kind>;
558  using traits = cuda::graph::node::detail_::kind_traits<kind>;
559  using params_type = traits::parameters_type;
560 
561 protected:
562  params_type params_;
563  struct {
564  bool context { false };
565  bool barrier_socpe { false };
566  } was_set;
567 
568  // This wrapper method ensures the builder-ish behavior, i.e. always returning the builder
569  // for further work via method invocation.
570  template <typename F> this_type& do_(F f) { f(); return *this; }
571 
572 public:
573  const params_type& params() { return params_; }
574 
575  this_type context(context_t context) noexcept
576  {
577  return do_([&] {
578  params_.first = ::std::move(context);
579  was_set.context = true;});
580  }
581 
582  this_type context(memory::barrier_scope_t barrier_socpe) noexcept
583  {
584  return do_([&] { params_.second = barrier_socpe; was_set.barrier_socpe = true;});
585  }
586 
587  CAW_MAYBE_UNUSED built_type build_within(const cuda::graph::template_t& graph_template)
588  {
589  if (not was_set.context) {
590  throw detail_::make_unspec_error("memory barrier", "CUDA context");
591  }
592  if (not was_set.barrier_socpe) {
593  throw detail_::make_unspec_error("memory barrier", "barrier scope");
594  }
595  return graph_template.insert.node<kind>(params_);
596  }
597 }; // typed_builder_t<kind_t::memory_barrier>
598 
599 #endif // CUDA_VERSION >= 11070
600 
601 } // namespace node
602 
603 } // namespace graph
604 
605 } // namespace cuda
606 
607 #endif // CUDA_VERSION >= 10000
608 
609 #endif //CUDA_API_WRAPPERS_NODE_BUILDER_HPP
endpoint_t
Type for choosing between endpoints of copy operations.
Definition: copy_parameters.hpp:19
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
CUstreamCallback callback_t
The CUDA driver&#39;s raw handle for a host-side callback function.
Definition: types.hpp:257
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682