7 #ifndef CUDA_API_WRAPPERS_NODE_BUILDER_HPP 8 #define CUDA_API_WRAPPERS_NODE_BUILDER_HPP 10 #if CUDA_VERSION >= 10000 22 inline ::std::logic_error make_unspec_error(
const char *node_type,
const char *missing_arg_name)
25 return ::std::logic_error(
26 ::std::string(
"Attempt to build a CUDA execution graph node of type ") + node_type +
27 " without specifying its " + missing_arg_name +
" argument");
32 template <kind_t Kind>
33 class typed_builder_t;
38 template <kind_t Kind>
39 typed_builder_t<Kind> kind() {
return typed_builder_t<Kind>{}; }
45 class typed_builder_t<kind_t::child_graph> {
47 static constexpr
const kind_t kind = kind_t::child_graph;
48 using this_type = typed_builder_t<kind>;
49 using built_type = typed_node_t<kind>;
50 using traits = cuda::graph::node::detail_::kind_traits<kind>;
51 using params_type = traits::parameters_type;
57 bool template_ {
false };
62 template <
typename F> this_type& do_(F f) { f();
return *
this; }
65 params_type& params() noexcept {
return params_; }
67 this_type& template_(template_t subgraph) {
69 params_ = ::std::move(subgraph);
70 was_set.template_ =
true;
74 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
76 if (not was_set.template_) {
77 throw detail_::make_unspec_error(
"child graph",
"child graph template");
79 return graph_template.insert.node<kind>(std::move(params_));
83 #if CUDA_VERSION >= 11010 86 class typed_builder_t<kind_t::record_event> {
88 static constexpr
const kind_t kind = kind_t::record_event;
89 using this_type = typed_builder_t<kind>;
90 using built_type = typed_node_t<kind>;
91 using traits = cuda::graph::node::detail_::kind_traits<kind>;
92 using params_type = traits::parameters_type;
103 template <
typename F> this_type& do_(F f) { f();
return *
this; }
106 params_type& params() noexcept {
return params_; }
108 this_type& event(event_t event) {
110 params_ = ::std::move(event);
111 was_set.event =
true;
115 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
117 if (not was_set.event) {
118 throw detail_::make_unspec_error(
"record event",
"event");
120 return graph_template.insert.node<kind>(std::move(params_));
125 class typed_builder_t<kind_t::wait_on_event> {
127 static constexpr
const kind_t kind = kind_t::wait_on_event;
128 using this_type = typed_builder_t<kind>;
129 using built_type = typed_node_t<kind>;
130 using traits = cuda::graph::node::detail_::kind_traits<kind>;
131 using params_type = traits::parameters_type;
137 bool event {
false };
142 template <
typename F> this_type& do_(F f) { f();
return *
this; }
145 params_type& params() noexcept {
return params_; }
147 this_type& event(event_t event) {
149 params_ = ::std::move(event);
150 was_set.event =
true;
154 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
156 if (not was_set.event) {
157 throw detail_::make_unspec_error(
"wait on event",
"event");
159 return graph_template.insert.node<kind>(std::move(params_));
163 #endif // CUDA_VERSION >= 11010 166 class typed_builder_t<kind_t::host_function_call> {
168 static constexpr
const kind_t kind = kind_t::host_function_call;
169 using this_type = typed_builder_t<kind>;
170 using built_type = typed_node_t<kind>;
171 using traits = cuda::graph::node::detail_::kind_traits<kind>;
172 using params_type = traits::parameters_type;
178 bool function_ptr_set {
false };
179 bool user_argument_set {
false };
184 template <
typename F> this_type& do_(F f) { f();
return *
this; }
187 params_type& params() noexcept {
return params_; }
192 params_.function_ptr = host_callback_function;
193 was_set.function_ptr_set =
true;
197 this_type argument(
void* callback_argument)
200 params_.user_data = callback_argument;
201 was_set.user_argument_set =
true;
205 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
207 if (not was_set.function_ptr_set) {
208 throw detail_::make_unspec_error(
"kernel_launch",
"host callback function pointer");
210 if (not was_set.user_argument_set) {
211 throw detail_::make_unspec_error(
"kernel_launch",
"user-specified callback function argument");
213 return graph_template.insert.node<kind>(params_);
218 class typed_builder_t<kind_t::kernel_launch> {
220 static constexpr
const kind_t kind = kind_t::kernel_launch;
221 using this_type = typed_builder_t<kind>;
222 using built_type = typed_node_t<kind>;
223 using traits = cuda::graph::node::detail_::kind_traits<kind>;
224 using params_type = traits::parameters_type;
227 params_type params_ {
235 bool kernel {
false };
236 bool launch_config {
false };
237 bool marshalled_arguments {
false };
243 template <
typename F> this_type& do_(F f) { f();
return *
this; }
246 params_type& params() noexcept {
return params_; }
248 this_type kernel(
const kernel_t& kernel)
253 params_ = { kernel, params_.launch_config, ::std::move(params_.marshalled_arguments) };
254 was_set.kernel =
true;
263 this_type launch_configuration(launch_configuration_t launch_config)
266 params_.launch_config = launch_config;
267 was_set.launch_config =
true;
271 this_type marshalled_arguments(::std::vector<void*> argument_ptrs)
274 params_.marshalled_arguments = ::std::move(argument_ptrs);
275 was_set.marshalled_arguments =
true;
279 template <
typename... Ts>
280 this_type arguments(Ts&&... args)
282 return marshalled_arguments(make_kernel_argument_pointers(::std::forward<Ts>(args)...));
285 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
287 if (not was_set.kernel) {
288 throw detail_::make_unspec_error(
"kernel_launch",
"kernel");
290 if (not was_set.launch_config) {
291 throw detail_::make_unspec_error(
"kernel_launch",
"launch configuration");
293 if (not was_set.marshalled_arguments) {
294 throw detail_::make_unspec_error(
"kernel_launch",
"launch arguments");
296 return graph_template.insert.node<kind>(params_);
300 #if CUDA_VERSION >= 11040 303 class typed_builder_t<kind_t::memory_allocation> {
305 static constexpr
const kind_t kind = kind_t::memory_allocation;
306 using this_type = typed_builder_t<kind>;
307 using built_type = typed_node_t<kind>;
308 using traits = cuda::graph::node::detail_::kind_traits<kind>;
309 using params_type = traits::parameters_type;
316 bool device {
false };
317 bool size_in_bytes {
false };
320 template <
typename F>
328 params_type& params() {
return params_; }
330 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
332 if (not was_set.device) {
333 throw detail_::make_unspec_error(
"memory allocation",
"device");
335 if (not was_set.size_in_bytes) {
336 throw detail_::make_unspec_error(
"memory allocation",
"allocation size in bytes");
338 return graph_template.insert.node<kind>(params_);
341 this_type& device(
const device_t& device) {
342 return do_([&]{ params_.first = device; was_set.device =
true; });
344 this_type& size(
const size_t size) {
345 return do_([&]{ params_.second = size; was_set.size_in_bytes =
true; });
349 #endif // CUDA_VERSION >= 11040 352 class typed_builder_t<kind_t::memory_copy> {
354 static constexpr
const kind_t kind = kind_t::memory_copy;
355 using this_type = typed_builder_t<kind>;
356 using built_type = typed_node_t<kind>;
357 using traits = cuda::graph::node::detail_::kind_traits<kind>;
358 using params_type = traits::parameters_type;
359 using dimensions_type = params_type::dimensions_type;
367 template <
typename F>
375 params_type& params() {
return params_; }
377 #if __cplusplus >= 201703L 380 built_type build_within(
const cuda::graph::template_t& graph_template)
383 return graph_template.insert.node<kind>(params_);
404 template <
typename... Ts>
405 this_type& source(Ts&&... args) {
406 return do_([&]{ params_.set_source(std::forward<Ts>(args)...); });
414 template <
typename... Ts>
415 this_type& destination(Ts&&... args) {
416 return do_([&]{ params_.set_destination(std::forward<Ts>(args)...); });
419 template <
typename... Ts>
420 this_type& endpoint(
endpoint_t endpoint, Ts&&... args) {
421 return do_([&]{ params_.set_endpoint(endpoint, ::std::forward<Ts>(args)...); });
449 class typed_builder_t<kind_t::memory_set> {
454 static constexpr
const kind_t kind = kind_t::memory_set;
455 using this_type = typed_builder_t<kind>;
456 using built_type = typed_node_t<kind>;
457 using traits = cuda::graph::node::detail_::kind_traits<kind>;
458 using params_type = traits::parameters_type;
463 bool region {
false };
464 bool value_and_width {
false };
469 template <
typename F> this_type& do_(F f) { f();
return *
this; }
471 template <
typename T>
476 const params_type& params() {
return params_; }
480 return do_([&] { params_.region = region; was_set.region =
true;});
483 template <
typename T>
484 this_type value(uint32_t v) noexcept
486 static_assert(
sizeof(T) <= 4,
"Type of value to set is too wide; maximum size is 4");
487 static_assert(
sizeof(T) != 3,
"Size of type to set is not a power of 2");
488 static_assert(std::is_trivially_copy_constructible<T>::value,
"Only a trivially-constructible value can be used for memset'ing");
490 params_.width_in_bytes =
sizeof(T);
493 case 1: params_.value =
reinterpret_cast<uint8_t&
>(v);
break;
494 case 2: params_.value =
reinterpret_cast<uint16_t&
>(v);
break;
496 default: params_.value =
reinterpret_cast<uint32_t&
>(v);
break;
498 was_set.value_and_width =
true;
502 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
504 if (not was_set.region) {
505 throw detail_::make_unspec_error(
"memory set",
"memory region");
507 if (not was_set.value_and_width) {
508 throw detail_::make_unspec_error(
"memory set",
"value to set");
510 return graph_template.insert.node<kind>(params_);
514 #if CUDA_VERSION >= 11040 516 class typed_builder_t<kind_t::memory_free> {
518 static constexpr
const kind_t kind = kind_t::memory_free;
519 using this_type = typed_builder_t<kind>;
520 using built_type = typed_node_t<kind>;
521 using traits = cuda::graph::node::detail_::kind_traits<kind>;
522 using params_type = traits::parameters_type;
532 template <
typename F> this_type& do_(F f) { f();
return *
this; }
535 const params_type& params() {
return params_; }
537 this_type region(
void*
address) noexcept {
return do_([&] { params_ =
address; was_set.address =
true;}); }
538 this_type region(
memory::region_t allocated_region) noexcept {
return this->region(allocated_region.data()); }
540 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
542 if (not was_set.address) {
543 throw detail_::make_unspec_error(
"memory free",
"allocated region starting address");
545 return graph_template.insert.node<kind>(params_);
549 #endif // CUDA_VERSION >= 11040 551 #if CUDA_VERSION >= 11070 553 class typed_builder_t<kind_t::memory_barrier> {
555 static constexpr
const kind_t kind = kind_t::memory_barrier;
556 using this_type = typed_builder_t<kind>;
557 using built_type = typed_node_t<kind>;
558 using traits = cuda::graph::node::detail_::kind_traits<kind>;
559 using params_type = traits::parameters_type;
564 bool context {
false };
565 bool barrier_socpe {
false };
570 template <
typename F> this_type& do_(F f) { f();
return *
this; }
573 const params_type& params() {
return params_; }
575 this_type context(context_t context) noexcept
578 params_.first = ::std::move(context);
579 was_set.context =
true;});
582 this_type context(memory::barrier_scope_t barrier_socpe) noexcept
584 return do_([&] { params_.second = barrier_socpe; was_set.barrier_socpe =
true;});
587 CAW_MAYBE_UNUSED built_type build_within(
const cuda::graph::template_t& graph_template)
589 if (not was_set.context) {
590 throw detail_::make_unspec_error(
"memory barrier",
"CUDA context");
592 if (not was_set.barrier_socpe) {
593 throw detail_::make_unspec_error(
"memory barrier",
"barrier scope");
595 return graph_template.insert.node<kind>(params_);
599 #endif // CUDA_VERSION >= 11070 607 #endif // CUDA_VERSION >= 10000 609 #endif //CUDA_API_WRAPPERS_NODE_BUILDER_HPP endpoint_t
Type for choosing between endpoints of copy operations.
Definition: copy_parameters.hpp:19
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
CUstreamCallback callback_t
The CUDA driver's raw handle for a host-side callback function.
Definition: types.hpp:257
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
address_t address(const void *device_ptr) noexcept
Definition: types.hpp:682