11 #ifndef CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP 12 #define CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP 14 #if CUDA_VERSION >= 10000 45 using edge_t = ::std::pair<node_t, node_t>;
47 inline ::std::string identify(
const edge_t &edge)
49 return ::std::string(
"edge from " + node::detail_::identify(edge.first)
50 +
" to " + node::detail_::identify(edge.second));
53 template <
typename NodeOrHandle>
54 handle_t as_handle(
const NodeOrHandle& node_or_handle) noexcept
56 return node_or_handle.handle();
59 template <>
inline handle_t as_handle(
const handle_t& handle) noexcept {
return handle; }
61 template <
template <
typename>
class Container,
typename NodeOrHandle>
62 struct as_handles_partial_specialization_helper{
63 static typename ::std::conditional<
64 ::std::is_same<NodeOrHandle, handle_t>::value,
66 ::std::vector<handle_t>
68 as_handles(Container<NodeOrHandle>&& nodes_or_handles)
71 ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type,
node::handle_t>::value or
72 ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type,
node::handle_t>::value,
73 "Unsupported graph node dependency specifier type. Use either cuda::graph::node_t or cuda::graph::node::handle_t");
74 ::std::vector<handle_t> handles;
75 handles.reserve(nodes_or_handles.size());
77 nodes_or_handles.begin(),
78 nodes_or_handles.end(),
79 ::std::back_inserter(handles),
80 as_handle<NodeOrHandle> );
85 template <
template <
typename>
class Container>
86 struct as_handles_partial_specialization_helper<Container,
handle_t> {
87 Container<handle_t> as_handles(Container<handle_t>&& node_handles)
93 template <
template <
typename>
class Container,
typename NodeOrHandle>
94 static typename ::std::conditional<
95 ::std::is_same<NodeOrHandle, handle_t>::value,
97 ::std::vector<handle_t>
99 as_handles(Container<NodeOrHandle>&& nodes_or_handles)
101 return as_handles_partial_specialization_helper<Container, handle_t>::as_handles(
102 ::std::forward<Container<NodeOrHandle>>(nodes_or_handles));
110 namespace template_ {
112 template_t
wrap(
handle_t handle,
bool take_ownership =
false) noexcept;
116 ::std::string identify(
const template_t& template_);
120 span<const node::handle_t> edge_source_handles,
121 span<const node::handle_t> edge_destination_handles)
123 auto num_edges = edge_source_handles.size();
124 assert(edge_source_handles.size() == num_edges &&
"Mismatched sizes of sources and destinations");
126 auto result = cuGraphRemoveDependencies(
127 template_handle,edge_source_handles.data(), edge_destination_handles.data(), num_edges);
133 span<const node_t> edge_sources,
134 span<const node_t> edge_destinations)
136 auto num_edges = edge_sources.size();
137 assert(edge_destinations.size() == num_edges &&
"Mismatched sizes of sources and destinations");
140 auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
142 auto handles_iter = handles_buffer;
143 ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
144 [](
const node_t &node) {
return node.handle(); });
145 ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
146 [](
const node_t &node) {
return node.handle(); });
148 span<const node::handle_t> edge_source_handles { handles_buffer.data(), num_edges };
149 span<const node::handle_t> edge_destination_handles { handles_buffer.data() + num_edges, num_edges };
150 return delete_edges(template_handle, edge_source_handles, edge_destination_handles);
156 span<const node_t> edge_sources,
157 span<const node_t> edge_destinations)
159 auto num_edges = edge_sources.size();
160 assert(edge_destinations.size() == num_edges &&
"Mismatched sizes of sources and destinations");
163 auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
165 auto handles_iter = handles_buffer;
166 ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
167 [](
const node_t &node) {
return node.handle(); });
168 ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
169 [](
const node_t &node) {
return node.handle(); });
172 const node::handle_t* destinations_handles = handles_buffer.data() + num_edges;
173 auto result = cuGraphAddDependencies(
174 template_handle,sources_handles, destinations_handles, edge_sources.size());
180 span<const node::detail_::edge_t> edges)
183 auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
184 auto sources_iterator = handles_buffer.begin();
185 auto destinations_iterator = handles_buffer.begin() + edges.size();
186 for(
const auto& edge : edges) {
187 *(sources_iterator++) = edge.first.handle();
188 *(destinations_iterator++) = edge.second.handle();
191 const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
192 auto result = cuGraphRemoveDependencies(
193 template_handle,sources_handles, destinations_handles, edges.size());
200 span<const node::detail_::edge_t> edges)
203 auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
204 auto sources_iterator = handles_buffer.begin();
205 auto destinations_iterator = handles_buffer.begin() + edges.size();
206 for(
const auto& edge : edges) {
207 *(sources_iterator++) = edge.first.handle();
208 *(destinations_iterator++) = edge.second.handle();
211 const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
212 auto result = cuGraphAddDependencies(
213 template_handle,sources_handles, destinations_handles, edges.size());
217 template <node::kind_t Kind>
218 status_t invoke_inserter_possibly_with_context(
219 cuda::detail_::bool_constant<false>,
222 CUgraphNode* dependency_handles,
223 size_t num_dependency_handles,
224 typename node::detail_::kind_traits<Kind>::raw_parameters_type&
228 auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
229 return node::detail_::kind_traits<Kind>::inserter(
231 graph_template_handle,
233 num_dependency_handles,
234 raw_params_maybe_ptr);
237 template <node::kind_t Kind>
238 status_t invoke_inserter_possibly_with_context(
239 cuda::detail_::bool_constant<true>,
242 CUgraphNode* dependency_handles,
243 size_t num_dependency_handles,
244 typename node::detail_::kind_traits<Kind>::raw_parameters_type&
248 auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
249 return node::detail_::kind_traits<Kind>::inserter(
251 graph_template_handle,
253 num_dependency_handles,
254 raw_params_maybe_ptr,
258 template <node::kind_t Kind>
262 typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
264 using traits_type =
typename node::detail_::kind_traits<Kind>;
267 const bool context_needed_but_missing =
268 traits_type::inserter_takes_context and context_handle == context::detail_::none;
269 if (context_needed_but_missing) {
270 throw ::std::invalid_argument(
271 "Attempt to insert a CUDA graph template " + ::std::string(traits_type::name)
272 +
" node without specifying an execution context");
276 auto no_dependency_handles =
nullptr;
277 size_t no_dependencies_size = 0;
278 auto status = invoke_inserter_possibly_with_context<Kind>(
279 cuda::detail_::bool_constant<traits_type::inserter_takes_context>{},
281 graph_template_handle,
282 no_dependency_handles,
283 no_dependencies_size,
286 throw_if_error_lazy(status,
"Inserting a " + ::std::string(traits_type::name) +
" node into " 287 + template_::detail_::identify(graph_template_handle));
288 return new_node_handle;
291 template <node::kind_t Kind,
typename... Ts>
292 node::typed_node_t<Kind> build_params_and_insert_node(
295 Ts&&... params_ctor_args)
298 using traits_type =
typename node::detail_::kind_traits<Kind>;
299 using parameters_t =
typename traits_type::parameters_type;
305 parameters_t params { ::std::forward<Ts>(params_ctor_args)... };
306 typename traits_type::raw_parameters_type raw_params = traits_type::marshal(params);
307 auto node_handle = insert_node<Kind>(graph_template_handle, context_handle, raw_params);
308 return node::wrap<Kind>(graph_template_handle, node_handle, ::std::move(params));
311 template <node::kind_t Kind,
typename... Ts>
312 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
313 cuda::detail_::true_type,
315 const context_t& context,
316 Ts&&... params_ctor_args)
318 return build_params_and_insert_node<Kind>(graph_template_handle, context.handle(), ::std::forward<Ts>(params_ctor_args)...);
321 template <node::kind_t Kind,
typename... Ts>
322 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
323 cuda::detail_::false_type,
325 Ts&&... params_ctor_args)
327 auto current_context_handle = context::current::detail_::get_handle();
329 return build_params_and_insert_node<Kind>(
330 graph_template_handle, current_context_handle, ::std::forward<Ts>(params_ctor_args)...);
334 template <node::kind_t Kind,
typename... Ts>
335 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
336 cuda::detail_::false_type ,
338 Ts&&... params_ctor_args)
340 return build_params_and_insert_node<Kind>(graph_template_handle, context::detail_::none, ::std::forward<Ts>(params_ctor_args)...);
343 template <node::kind_t Kind,
typename T,
typename... Ts>
344 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
345 cuda::detail_::true_type,
348 Ts&&... params_ctor_args)
350 static constexpr
const bool first_arg_is_a_context =
351 ::std::is_same<typename cuda::detail_::remove_reference_t<T>,
cuda::context_t>::value;
352 return get_context_handle_build_params_and_insert_node<Kind>(
354 cuda::detail_::bool_constant<first_arg_is_a_context>{},
355 graph_template_handle, ::std::forward<T>(first_arg), ::std::forward<Ts>(params_ctor_args)...);
374 using node_ref_type = node_t;
378 using edge_type = ::std::pair<node_ref_type, node_ref_type>;
380 using node_ref_container_type = ::std::vector<node_t>;
384 using edge_container_type = ::std::vector<edge_type>;
389 handle_type handle() const noexcept {
return handle_; }
392 bool is_owning() const noexcept {
return owning_; }
396 #if CUDA_VERSION >= 11030 397 struct dot_printing_options_t {
399 bool use_runtime_types;
416 } external_semaphore;
418 bool kernel_node_attributes;
419 bool node_and_kernel_function_handles;
421 unsigned compose()
const {
423 | (debug_data ? CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE : 0)
424 | (use_runtime_types ? CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES : 0)
425 | (node_params.launch.kernel ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS : 0)
426 | (node_params.launch.host_function ? CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS : 0)
427 #if CUDA_VERSION >= 11040 428 | (node_params.memory_ops.allocate ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS : 0)
429 | (node_params.memory_ops.free ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS : 0)
430 #endif // CUDA_VERSION >= 11040 431 | (node_params.memory_ops.copy ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS : 0)
432 | (node_params.memory_ops.set ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS : 0)
433 | (node_params.event ? CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS : 0)
434 | (node_params.external_semaphore.signal ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS : 0)
435 | (node_params.external_semaphore.wait ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS : 0)
436 | (kernel_node_attributes ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES : 0)
437 | (node_and_kernel_function_handles ? CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES : 0)
442 #endif // CUDA_VERSION >= 11030 451 template_t clone()
const 453 handle_type clone_handle;
454 auto status = cuGraphClone(&clone_handle, handle_);
456 return template_t{ clone_handle, do_take_ownership };
459 #if CUDA_VERSION >= 11030 468 void print_dot(
const char* dot_filename, dot_printing_options_t printing_options = {})
const 470 auto status = cuGraphDebugDotPrint(handle_, dot_filename, printing_options.compose());
471 throw_if_error_lazy(status,
"Printing " + template_::detail_::identify(*
this) +
" to file " + dot_filename);
473 #endif // CUDA_VERSION >= 11030 477 size_type num_nodes()
const 480 auto status = cuGraphGetNodes(handle_,
nullptr, &num_nodes_);
481 throw_if_error_lazy(status,
"Obtaining the number of nodes in " + template_::detail_::identify(*
this));
492 node_ref_container_type nodes()
const 494 size_type num_nodes_ { num_nodes() } ;
495 ::std::vector<node::handle_t> node_handles {num_nodes_ };
496 auto status = cuGraphGetNodes(handle_, node_handles.data(), &num_nodes_);
497 throw_if_error_lazy(status,
"Obtaining the set of nodes of " + template_::detail_::identify(*
this));
498 node_ref_container_type node_refs;
499 for (
const auto& node_handle : node_handles) {
500 node_refs.emplace_back(
node::wrap(handle_, node_handle));
509 size_type num_roots()
const 513 auto status = cuGraphGetRootNodes(handle_,
nullptr, &num_roots_);
514 throw_if_error_lazy(status,
"Obtaining the number of root nodes in " + template_::detail_::identify(*
this));
522 node_ref_container_type roots()
const 525 size_type num_roots_ {num_roots() } ;
526 ::std::vector<node::handle_t> root_node_handles {num_roots_ };
527 auto status = cuGraphGetRootNodes(handle_, root_node_handles.data(), &num_roots_);
528 throw_if_error_lazy(status,
"Obtaining the set of root nodes of " + template_::detail_::identify(*
this));
529 node_ref_container_type root_node_refs;
530 for (
const auto& node_handle : root_node_handles) {
531 root_node_refs.emplace_back(
node::wrap(handle_, node_handle));
533 return root_node_refs;
537 size_type num_edges()
const 540 auto status = cuGraphGetEdges(handle_,
nullptr,
nullptr, &num_edges);
541 throw_if_error_lazy(status,
"Obtaining the number of edges in " + template_::detail_::identify(*
this));
545 edge_container_type edges()
const 547 size_type num_edges_ { num_edges() } ;
548 ::std::vector<node::handle_t> from_node_handles { num_edges_ };
549 ::std::vector<node::handle_t> to_node_handles { num_edges_ };
550 auto status = cuGraphGetEdges(handle_, from_node_handles.data(), to_node_handles.data(), &num_edges_);
551 throw_if_error_lazy(status,
"Obtaining the set of edges in " + template_::detail_::identify(*
this));
552 edge_container_type edges;
555 auto from_iter = from_node_handles.cbegin();
556 auto to_iter = from_node_handles.cbegin();
557 for (; from_iter != from_node_handles.cend(); from_iter++, to_iter++) {
558 assert(to_iter != to_node_handles.cend());
559 auto from_node_ref =
node::wrap(handle_, *from_iter);
560 auto to_node_ref =
node::wrap(handle_, *to_iter);
561 edges.emplace_back(from_node_ref, to_node_ref);
579 const template_t& associated_template;
584 insert_t(
const template_t& template_) : associated_template(template_) {}
586 void edge(node_ref_type source, node_ref_type dest)
const 591 } handles { source.handle(), dest.handle() };
592 static constexpr
const size_t remove_just_one = 1;
593 auto status = cuGraphAddDependencies(
594 handle(), &handles.source, &handles.dest, remove_just_one);
596 throw_if_error_lazy(status,
"Inserting " + node::detail_::identify(edge_type{source, dest})
597 +
" into " + template_::detail_::identify(associated_template));
600 void edge(edge_type edge_)
const 602 return edge(edge_.first, edge_.second);
605 void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations)
const 607 if (sources.size() != destinations.size()) {
608 throw ::std::invalid_argument(
609 "Differing number of source nodes and destination nodes (" 610 + ::std::to_string(sources.size()) +
" != " + ::std::to_string(destinations.size())
611 +
" in a request to insert edges into " + template_::detail_::identify(associated_template) );
613 auto status = template_::detail_::insert_edges(handle(), sources, destinations);
615 throw_if_error_lazy(status,
"Destroying " + ::std::to_string(sources.size()) +
" edges in " 616 + template_::detail_::identify(associated_template));
619 void edges(span<const edge_type> edges)
const 621 auto status = template_::detail_::insert_edges(handle(), edges);
624 + template_::detail_::identify(associated_template));
627 template <node::kind_t Kind,
typename T,
typename... Ts>
628 typename node::typed_node_t<Kind> node(
629 T&& arg, Ts&&... node_params_ctor_arguments)
const 634 static constexpr
const bool inserter_takes_context = node::detail_::kind_traits<Kind>::inserter_takes_context;
635 return template_::detail_::build_params_and_insert_node_wrapper<Kind>(
636 cuda::detail_::bool_constant<inserter_takes_context>{}, handle(),
637 ::std::forward<T>(arg), ::std::forward<Ts>(node_params_ctor_arguments)...);
649 const template_t &associated_template;
650 handle_type handle() const noexcept {
return associated_template.handle(); }
653 delete_t(
const template_t &template_) : associated_template(template_) {}
655 void node(node_ref_type node)
const 657 auto status = cuGraphDestroyNode(node.handle());
659 +
" in " + template_::detail_::identify(associated_template));
662 void edge(edge_type
const& edge_)
const 667 } handles { edge_.first.handle(), edge_.second.handle() };
668 static constexpr
const size_t remove_just_one = 1;
669 auto status = cuGraphRemoveDependencies(
670 handle(), &handles.source, &handles.dest, remove_just_one);
673 +
" in " + template_::detail_::identify(associated_template));
676 void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations)
const 678 if (sources.size() != destinations.size()) {
679 throw ::std::invalid_argument(
680 "Differing number of source nodes and destination nodes (" 681 + ::std::to_string(sources.size()) +
" != " + ::std::to_string(destinations.size())
682 +
" in a request to insert edges into " + template_::detail_::identify(associated_template) );
684 auto status = template_::detail_::delete_edges(handle(), sources, destinations);
686 throw_if_error_lazy(status,
"Destroying " + ::std::to_string(sources.size()) +
" edges in " 687 + template_::detail_::identify(associated_template));
690 void edges(span<edge_type> edges)
const 692 auto status = template_::detail_::delete_edges(handle(), edges);
695 + template_::detail_::identify(associated_template));
701 friend template_t
template_::wrap(handle_type handle,
bool take_ownership) noexcept;
704 template_t(handle_type handle,
bool owning) noexcept
705 : handle_(handle), owning_(owning)
709 template_t(
const template_t& other) noexcept =
delete;
710 template_t(template_t&& other) noexcept : template_t(other.handle_, other.owning_)
712 other.owning_ =
false;
715 ~template_t() noexcept(false)
718 auto status = cuGraphDestroy(handle_);
724 template_t& operator=(
const template_t&) =
delete;
725 template_t& operator=(template_t&& other) noexcept
727 ::std::swap(handle_, other.handle_);
728 ::std::swap(owning_, other.owning_);
733 instance_t instantiate(
734 #
if CUDA_VERSION >= 11040
735 bool free_previous_allocations_before_relaunch =
false 737 #
if CUDA_VERSION >= 11700
738 ,
bool use_per_node_priorities =
false 740 #
if CUDA_VERSION >= 12000
741 ,
bool upload_on_instantiation =
false 742 ,
bool make_device_launchable =
false 747 const insert_t insert { *
this };
748 const delete_t delete_ { *
this };
755 namespace template_ {
757 inline template_t
wrap(
handle_t handle,
bool take_ownership) noexcept
759 return { handle, take_ownership };
762 inline template_t create()
764 constexpr
const unsigned flags { 0 };
766 auto status = cuGraphCreate(&handle, flags);
768 return wrap(handle, do_take_ownership);
771 inline ::std::string identify(
const template_t& template_)
773 return "CUDA execution graph template at " + cuda::detail_::ptr_as_hex(template_.handle());
776 constexpr const ::std::initializer_list<node_t> no_dependencies {};
778 template <node::kind_t Kind,
template <
typename>
class Container,
typename NodeOrHandle,
typename... NodeParametersCtorParams>
779 node::typed_node_t<Kind> insert_node(
780 const template_t& graph,
781 Container<NodeOrHandle> dependencies,
782 NodeParametersCtorParams... node_parameters_ctor_params)
784 using traits_type =
typename node::detail_::kind_traits<Kind>;
785 node::parameters_t<Kind> params { ::std::forward<NodeParametersCtorParams>(node_parameters_ctor_params)... };
786 auto raw_params = traits_type::marshal(params);
787 auto untyped_node = template_::detail_::insert_node(graph.handle(), raw_params, dependencies);
788 return node::wrap<Kind>(untyped_node.containing_graph(), untyped_node.handle(), params);
795 inline template_t create()
797 return template_::create();
815 inline optional<node_t> find_in_clone(node_t node,
const template_t& cloned_graph)
820 auto status = cuGraphNodeFindInClone(&search_result, node.handle(), cloned_graph.handle());
821 if (status == cuda::status::invalid_value and search_result !=
nullptr) {
824 throw_if_error_lazy(status,
"Searching for a copy of " + node::detail_::identify(node) +
" in " + template_::detail_::identify(cloned_graph));
825 return node::wrap(cloned_graph.handle(), search_result);
832 #endif // CUDA_VERSION >= 10000 834 #endif // CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467
void free(void *ptr)
Free a region of device-side memory (regardless of how it was allocated)
Definition: memory.hpp:130
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
Graph template node proxy (base-)class base-class node_t and supporting code.
Fundamental CUDA-related type definitions.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77