cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
template.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
12 #define CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
13 
14 #if CUDA_VERSION >= 10000
15 
17 #include <cuda/api/graph/node.hpp>
18 #include <cuda/api/types.hpp>
19 #include <cuda/api/error.hpp>
21 
22 #include <vector>
23 #include <cassert>
24 #include <algorithm>
25 
26 namespace cuda {
27 
29 class device_t;
30 class stream_t;
32 
33 namespace graph {
34 
36 class template_t;
37 class instance_t;
39 
40 namespace node {
41 
42 namespace detail_ {
43 
44 // I'm not so sure about this...
45 using edge_t = ::std::pair<node_t, node_t>;
46 
47 inline ::std::string identify(const edge_t &edge)
48 {
49  return ::std::string("edge from " + node::detail_::identify(edge.first)
50  + " to " + node::detail_::identify(edge.second));
51 }
52 
53 template <typename NodeOrHandle>
54 handle_t as_handle(const NodeOrHandle& node_or_handle) noexcept
55 {
56  return node_or_handle.handle();
57 }
58 
59 template <> inline handle_t as_handle(const handle_t& handle) noexcept { return handle; }
60 
61 template <template <typename> class Container, typename NodeOrHandle>
62 struct as_handles_partial_specialization_helper{
63  static typename ::std::conditional<
64  ::std::is_same<NodeOrHandle, handle_t>::value,
65  span<handle_t>,
66  ::std::vector<handle_t>
67  >::type
68  as_handles(Container<NodeOrHandle>&& nodes_or_handles)
69  {
70  static_assert(
71  ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type, node::handle_t>::value or
72  ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type, node::handle_t>::value,
73  "Unsupported graph node dependency specifier type. Use either cuda::graph::node_t or cuda::graph::node::handle_t");
74  ::std::vector<handle_t> handles;
75  handles.reserve(nodes_or_handles.size());
76  ::std::transform(
77  nodes_or_handles.begin(),
78  nodes_or_handles.end(),
79  ::std::back_inserter(handles),
80  as_handle<NodeOrHandle> );
81  return handles;
82  }
83 };
84 
85 template <template <typename> class Container>
86 struct as_handles_partial_specialization_helper<Container, handle_t> {
87  Container<handle_t> as_handles(Container<handle_t>&& node_handles)
88  {
89  return node_handles;
90  }
91 };
92 
93 template <template <typename> class Container, typename NodeOrHandle>
94 static typename ::std::conditional<
95  ::std::is_same<NodeOrHandle, handle_t>::value,
96  span<handle_t>,
97  ::std::vector<handle_t>
98 >::type
99 as_handles(Container<NodeOrHandle>&& nodes_or_handles)
100 {
101  return as_handles_partial_specialization_helper<Container, handle_t>::as_handles(
102  ::std::forward<Container<NodeOrHandle>>(nodes_or_handles));
103 }
104 
105 } // namespace detail_
106 
107 } // namespace node
108 
109 namespace template_ {
110 
111 template_t wrap(handle_t handle, bool take_ownership = false) noexcept;
112 
113 namespace detail_ {
114 
115 ::std::string identify(const template_t& template_);
116 
117 #if CUDA_VERSION >= 13010
118 inline id_t get_id(handle_t handle)
119 {
120  id_t id;
121  auto status = cuGraphGetId(handle, &id);
122  throw_if_error_lazy(status, "Getting the local (DOT-printing) ID of " + identify(handle));
123  return id;
124 }
125 #endif // CUDA_VERSION >= 13010
126 
127 inline CUresult delete_edges(
128  handle_t template_handle,
129  node::handle_t const* source_handles,
130  node::handle_t const* destination_handles,
131  size_t num_edges)
132 {
133 #if CUDA_VERSION >= 13000
134  static constexpr const auto no_edge_data = nullptr;
135  return cuGraphRemoveDependencies(template_handle, source_handles, destination_handles, no_edge_data, num_edges);
136 #else
137  return cuGraphRemoveDependencies(template_handle, source_handles, destination_handles, num_edges);
138 #endif
139 }
140 
141 inline status_t delete_edges(
142  template_::handle_t template_handle,
143  span<const node::handle_t> edge_source_handles,
144  span<const node::handle_t> edge_destination_handles)
145 {
146  auto num_edges = edge_source_handles.size();
147  assert(edge_source_handles.size() == num_edges && "Mismatched sizes of sources and destinations");
148  return delete_edges(
149  template_handle,edge_source_handles.data(), edge_destination_handles.data(), num_edges);
150 }
151 
152 inline status_t delete_edges(
153  template_::handle_t template_handle,
154  span<const node_t> edge_sources,
155  span<const node_t> edge_destinations)
156 {
157  auto num_edges = edge_sources.size();
158  assert(edge_destinations.size() == num_edges && "Mismatched sizes of sources and destinations");
159 
160  // TODO: With C++14, consider make_unique here and no container
161  auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
162  {
163  auto handles_iter = handles_buffer;
164  ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
165  [](const node_t &node) { return node.handle(); });
166  ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
167  [](const node_t &node) { return node.handle(); });
168  }
169  span<const node::handle_t> edge_source_handles { handles_buffer.data(), num_edges };
170  span<const node::handle_t> edge_destination_handles { handles_buffer.data() + num_edges, num_edges };
171  return delete_edges(template_handle, edge_source_handles, edge_destination_handles);
172 }
173 
174 inline status_t insert_edges(
175  handle_t template_handle,
176  node::handle_t const * source_handles,
177  node::handle_t const * destination_handles,
178  size_t num_edges)
179 {
180 #if CUDA_VERSION >= 13000
181  static constexpr const auto no_edge_data = nullptr;
182  return cuGraphAddDependencies(template_handle, source_handles, destination_handles, no_edge_data, num_edges);
183 #else
184  return cuGraphAddDependencies(template_handle, source_handles, destination_handles, num_edges);
185 #endif
186 }
187 
188 // Note: duplication of code with delete_edges
189 inline status_t insert_edges(
190  template_::handle_t template_handle,
191  span<const node_t> edge_sources,
192  span<const node_t> edge_destinations)
193 {
194  auto num_edges = edge_sources.size();
195  assert(edge_destinations.size() == num_edges && "Mismatched sizes of sources and destinations");
196 
197  // TODO: With C++14, consider make_unique here and no container
198  auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
199  {
200  auto handles_iter = handles_buffer;
201  ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
202  [](const node_t &node) { return node.handle(); });
203  ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
204  [](const node_t &node) { return node.handle(); });
205  }
206  const node::handle_t* sources_handles = handles_buffer.data();
207  const node::handle_t* destinations_handles = handles_buffer.data() + num_edges;
208  auto result = insert_edges(
209  template_handle,sources_handles, destinations_handles, edge_sources.size());
210  return result;
211 }
212 
213 inline status_t delete_edges(
214  template_::handle_t template_handle,
215  span<const node::detail_::edge_t> edges)
216 {
217  // TODO: With C++14, consider make_unique here
218  auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
219  auto sources_iterator = handles_buffer.begin();
220  auto destinations_iterator = handles_buffer.begin() + edges.size();
221  for(const auto& edge : edges) {
222  *(sources_iterator++) = edge.first.handle();
223  *(destinations_iterator++) = edge.second.handle();
224  }
225  const node::handle_t* sources_handles = handles_buffer.data();
226  const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
227  auto result = delete_edges(
228  template_handle,sources_handles, destinations_handles, edges.size());
229  return result;
230 }
231 
232 // Note: duplication of code with delete_edges
233 inline status_t insert_edges(
234  template_::handle_t template_handle,
235  span<const node::detail_::edge_t> edges)
236 {
237  // TODO: With C++14, consider make_unique here
238  auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
239  auto sources_iterator = handles_buffer.begin();
240  auto destinations_iterator = handles_buffer.begin() + edges.size();
241  for(const auto& edge : edges) {
242  *(sources_iterator++) = edge.first.handle();
243  *(destinations_iterator++) = edge.second.handle();
244  }
245  const node::handle_t* sources_handles = handles_buffer.data();
246  const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
247  auto result = insert_edges(
248  template_handle,sources_handles, destinations_handles, edges.size());
249  return result;
250 }
251 
252 template <node::kind_t Kind>
253 status_t invoke_inserter_possibly_with_context(
254  cuda::detail_::bool_constant<false>,
255  node::handle_t& new_node_handle,
256  template_::handle_t graph_template_handle,
257  CUgraphNode* dependency_handles,
258  size_t num_dependency_handles,
259  typename node::detail_::kind_traits<Kind>::raw_parameters_type&
260  raw_params,
262 {
263  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
264  return node::detail_::kind_traits<Kind>::inserter(
265  &new_node_handle,
266  graph_template_handle,
267  dependency_handles,
268  num_dependency_handles,
269  raw_params_maybe_ptr);
270 }
271 
272 template <node::kind_t Kind>
273 status_t invoke_inserter_possibly_with_context(
274  cuda::detail_::bool_constant<true>,
275  node::handle_t& new_node_handle,
276  template_::handle_t graph_template_handle,
277  CUgraphNode* dependency_handles,
278  size_t num_dependency_handles,
279  typename node::detail_::kind_traits<Kind>::raw_parameters_type&
280  raw_params,
281  context::handle_t context_handle)
282 {
283  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
284  return node::detail_::kind_traits<Kind>::inserter(
285  &new_node_handle,
286  graph_template_handle,
287  dependency_handles,
288  num_dependency_handles,
289  raw_params_maybe_ptr,
290  context_handle);
291 }
292 
293 template <node::kind_t Kind>
294 node::handle_t insert_node(
295  template_::handle_t graph_template_handle,
296  context::handle_t context_handle,
297  typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
298 {
299  using traits_type = typename node::detail_::kind_traits<Kind>;
300 
301  // Defining a useless bool here to circumvent gratuitous warnings from MSVC
302  const bool context_needed_but_missing =
303  traits_type::inserter_takes_context and context_handle == context::detail_::none;
304  if (context_needed_but_missing) {
305  throw ::std::invalid_argument(
306  "Attempt to insert a CUDA graph template " + ::std::string(traits_type::name)
307  + " node without specifying an execution context");
308  }
309 
310  node::handle_t new_node_handle;
311  auto no_dependency_handles = nullptr;
312  size_t no_dependencies_size = 0;
313  auto status = invoke_inserter_possibly_with_context<Kind>(
314  cuda::detail_::bool_constant<traits_type::inserter_takes_context>{},
315  new_node_handle,
316  graph_template_handle,
317  no_dependency_handles,
318  no_dependencies_size,
319  raw_params,
320  context_handle);
321  throw_if_error_lazy(status, "Inserting a " + ::std::string(traits_type::name) + " node into "
322  + template_::detail_::identify(graph_template_handle));
323  return new_node_handle;
324 }
325 
326 template <node::kind_t Kind, typename... Ts>
327 node::typed_node_t<Kind> build_params_and_insert_node(
328  template_::handle_t graph_template_handle,
329  context::handle_t context_handle,
330  Ts&&... params_ctor_args)
331 {
332 
333  using traits_type = typename node::detail_::kind_traits<Kind>;
334  using parameters_t = typename traits_type::parameters_type;
335 
336  // TODO: Why won't this work?
337  // static_assert(::std::is_constructible<parameters_t, Ts...>::value,
338  // "Node parameters are not constructible from the arguments passed");
339 
340  parameters_t params { ::std::forward<Ts>(params_ctor_args)... };
341  typename traits_type::raw_parameters_type raw_params = traits_type::marshal(params);
342  auto node_handle = insert_node<Kind>(graph_template_handle, context_handle, raw_params);
343  return node::wrap<Kind>(graph_template_handle, node_handle, ::std::move(params));
344 }
345 
346 template <node::kind_t Kind, typename... Ts>
347 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
348  cuda::detail_::true_type, // we've been given a context
349  template_::handle_t graph_template_handle,
350  const context_t& context,
351  Ts&&... params_ctor_args)
352 {
353  return build_params_and_insert_node<Kind>(graph_template_handle, context.handle(), ::std::forward<Ts>(params_ctor_args)...);
354 }
355 
356 template <node::kind_t Kind, typename... Ts>
357 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
358  cuda::detail_::false_type, // We've not been given a context
359  template_::handle_t graph_template_handle,
360  Ts&&... params_ctor_args)
361 {
362  auto current_context_handle = context::current::detail_::get_handle();
363  // TODO: Consider handling the case of no current context, e.g. by using the default device' primary context
364  return build_params_and_insert_node<Kind>(
365  graph_template_handle, current_context_handle, ::std::forward<Ts>(params_ctor_args)...);
366 }
367 
368 
369 template <node::kind_t Kind, typename... Ts>
370 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
371  cuda::detail_::false_type , // inserter doesn't takes a context
372  template_::handle_t graph_template_handle,
373  Ts&&... params_ctor_args)
374 {
375  return build_params_and_insert_node<Kind>(graph_template_handle, context::detail_::none, ::std::forward<Ts>(params_ctor_args)...);
376 }
377 
378 template <node::kind_t Kind, typename T, typename... Ts>
379 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
380  cuda::detail_::true_type, // inserter takes a context
381  template_::handle_t graph_template_handle,
382  T&& first_arg, // still don't know of T is a context or something else
383  Ts&&... params_ctor_args)
384 {
385  static constexpr const bool first_arg_is_a_context =
386  ::std::is_same<typename cuda::detail_::remove_reference_t<T>, cuda::context_t>::value;
387  return get_context_handle_build_params_and_insert_node<Kind>(
388 //return blah<Kind>(
389  cuda::detail_::bool_constant<first_arg_is_a_context>{},
390  graph_template_handle, ::std::forward<T>(first_arg), ::std::forward<Ts>(params_ctor_args)...);
391 }
392 
393 inline status_t get_edges(
394  handle_t template_handle,
395  node::handle_t * __restrict__ source_handles,
396  node::handle_t * __restrict__ destination_handles,
397  ::std::size_t * __restrict__ num_nodes)
398 {
399 #if CUDA_VERSION >= 13000
400  static constexpr auto no_edge_data { nullptr };
401  return cuGraphGetEdges(template_handle, source_handles, destination_handles, no_edge_data, num_nodes);
402 #else
403  return cuGraphGetEdges(template_handle, source_handles, destination_handles, num_nodes);
404 #endif
405 }
406 
407 } // namespace detail_
408 
409 } // namespace template_
410 
418 class template_t {
419 public: // type definitions
420  using size_type = size_t;
421  using handle_type = template_::handle_t;
422 
423  using node_ref_type = node_t;
424 
427  using edge_type = ::std::pair<node_ref_type, node_ref_type>;
428 
429  using node_ref_container_type = ::std::vector<node_t>;
430 
433  using edge_container_type = ::std::vector<edge_type>;
434 
435 public: // getters
436 
438  handle_type handle() const noexcept { return handle_; }
439 
441  bool is_owning() const noexcept { return owning_; }
442 
443 public: // non-mutators
444 
445 #if CUDA_VERSION >= 11030
446  struct dot_printing_options_t {
447  bool debug_data;
448  bool use_runtime_types;
449  // TODO: Consider having a map/array mapping of kind_t's to bools
450  struct {
451  struct {
452  bool kernel;
453  bool host_function;
454  } launch;
455  struct {
456  bool allocate;
457  bool free;
458  bool copy;
459  bool set;
460  } memory_ops;
461  bool event;
462  struct {
463  bool signal;
464  bool wait;
465  } external_semaphore;
466  } node_params;
467  bool kernel_node_attributes;
468  bool node_and_kernel_function_handles;
469 
470  unsigned compose() const {
471  return 0u
472  | (debug_data ? CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE : 0)
473  | (use_runtime_types ? CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES : 0)
474  | (node_params.launch.kernel ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS : 0)
475  | (node_params.launch.host_function ? CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS : 0)
476 #if CUDA_VERSION >= 11040
477  | (node_params.memory_ops.allocate ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS : 0)
478  | (node_params.memory_ops.free ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS : 0)
479 #endif // CUDA_VERSION >= 11040
480  | (node_params.memory_ops.copy ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS : 0)
481  | (node_params.memory_ops.set ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS : 0)
482  | (node_params.event ? CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS : 0)
483  | (node_params.external_semaphore.signal ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS : 0)
484  | (node_params.external_semaphore.wait ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS : 0)
485  | (kernel_node_attributes ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES : 0)
486  | (node_and_kernel_function_handles ? CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES : 0)
487  ;
488  }
489  // TODO: Consider initializing all fields to false in the default constructor
490  };
491 #endif // CUDA_VERSION >= 11030
492 
500  template_t clone() const
501  {
502  handle_type clone_handle;
503  auto status = cuGraphClone(&clone_handle, handle_);
504  throw_if_error_lazy(status, "Cloning " + template_::detail_::identify(*this));
505  return template_t{ clone_handle, do_take_ownership };
506  }
507 
508 #if CUDA_VERSION >= 11030
509 
517  void print_dot(const char* dot_filename, dot_printing_options_t printing_options = {}) const
518  {
519  auto status = cuGraphDebugDotPrint(handle_, dot_filename, printing_options.compose());
520  throw_if_error_lazy(status, "Printing " + template_::detail_::identify(*this) + " to file " + dot_filename);
521  }
522 #endif // CUDA_VERSION >= 11030
523 
526  size_type num_nodes() const
527  {
528  ::std::size_t num_nodes_;
529  auto status = cuGraphGetNodes(handle_, nullptr, &num_nodes_);
530  throw_if_error_lazy(status, "Obtaining the number of nodes in " + template_::detail_::identify(*this));
531  return num_nodes_;
532  }
533 
541  node_ref_container_type nodes() const
542  {
543  size_type num_nodes_ { num_nodes() } ;
544  ::std::vector<node::handle_t> node_handles { num_nodes_ };
545  auto status = cuGraphGetNodes(handle_, node_handles.data(), &num_nodes_);
546  throw_if_error_lazy(status, "Obtaining the set of nodes of " + template_::detail_::identify(*this));
547  node_ref_container_type node_refs;
548  for (const auto& node_handle : node_handles) {
549  node_refs.emplace_back(node::wrap(handle_, node_handle));
550  }
551  return node_refs;
552  }
553 
558  size_type num_roots() const
559  {
560  // Note: Code duplication with num_nodes()
561  ::std::size_t num_roots_;
562  auto status = cuGraphGetRootNodes(handle_, nullptr, &num_roots_);
563  throw_if_error_lazy(status, "Obtaining the number of root nodes in " + template_::detail_::identify(*this));
564  return num_roots_;
565  }
566 
571  node_ref_container_type roots() const
572  {
573  // Note: Code duplication wiuth nodes()
574  size_type num_roots_ {num_roots() } ;
575  ::std::vector<node::handle_t> root_node_handles {num_roots_ };
576  auto status = cuGraphGetRootNodes(handle_, root_node_handles.data(), &num_roots_);
577  throw_if_error_lazy(status, "Obtaining the set of root nodes of " + template_::detail_::identify(*this));
578  node_ref_container_type root_node_refs;
579  for (const auto& node_handle : root_node_handles) {
580  root_node_refs.emplace_back(node::wrap(handle_, node_handle));
581  }
582  return root_node_refs;
583  }
584 
586  size_type num_edges() const
587  {
588  ::std::size_t num_edges;
589  auto status = template_::detail_::get_edges(handle_, nullptr, nullptr, &num_edges);
590  throw_if_error_lazy(status, "Obtaining the number of edges in " + template_::detail_::identify(*this));
591  return num_edges;
592  }
593 
594  edge_container_type edges() const
595  {
596  size_type num_edges_ { num_edges() } ;
597  ::std::vector<node::handle_t> from_node_handles { num_edges_ };
598  ::std::vector<node::handle_t> to_node_handles { num_edges_ };
599  auto status = template_::detail_::get_edges(
600  handle_, from_node_handles.data(), to_node_handles.data(), &num_edges_);
601  throw_if_error_lazy(status, "Obtaining the set of edges in " + template_::detail_::identify(*this));
602  edge_container_type edges;
603  // TODO: Use container/range zipping, and a ranged-for loop
604  {
605  auto from_iter = from_node_handles.cbegin();
606  auto to_iter = from_node_handles.cbegin();
607  for (; from_iter != from_node_handles.cend(); from_iter++, to_iter++) {
608  assert(to_iter != to_node_handles.cend());
609  auto from_node_ref = node::wrap(handle_, *from_iter);
610  auto to_node_ref = node::wrap(handle_, *to_iter);
611  edges.emplace_back(from_node_ref, to_node_ref);
612  }
613  }
614  return edges;
615  }
616 
627  class insert_t {
628  protected:
629  const template_t& associated_template;
630 
631  template_::handle_t handle() const noexcept { return associated_template.handle(); }
632 
633  public:
634  insert_t(const template_t& template_) : associated_template(template_) {}
635 
636  void edge(node_ref_type source, node_ref_type dest) const
637  {
638  struct {
639  const node::handle_t source;
640  const node::handle_t dest;
641  } handles { source.handle(), dest.handle() };
642  static constexpr const size_t remove_just_one = 1;
643  auto status = template_::detail_::insert_edges(
644  handle(), &handles.source, &handles.dest, remove_just_one);
645  throw_if_error_lazy(status, "Inserting " + node::detail_::identify(edge_type{source, dest})
646  + " into " + template_::detail_::identify(associated_template));
647  }
648 
649  void edge(edge_type edge_) const
650  {
651  return edge(edge_.first, edge_.second);
652  }
653 
654  void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations) const
655  {
656  if (sources.size() != destinations.size()) {
657  throw ::std::invalid_argument(
658  "Differing number of source nodes and destination nodes ("
659  + ::std::to_string(sources.size()) + " != " + ::std::to_string(destinations.size())
660  + " in a request to insert edges into " + template_::detail_::identify(associated_template) );
661  }
662  auto status = template_::detail_::insert_edges(handle(), sources, destinations);
663 
664  throw_if_error_lazy(status, "Destroying " + ::std::to_string(sources.size()) + " edges in "
665  + template_::detail_::identify(associated_template));
666  }
667 
668  void edges(span<const edge_type> edges) const
669  {
670  auto status = template_::detail_::insert_edges(handle(), edges);
671 
672  throw_if_error_lazy(status, "Inserting " + ::std::to_string(edges.size()) + " edges into "
673  + template_::detail_::identify(associated_template));
674  }
675 
676  template <node::kind_t Kind, typename T, typename... Ts>
677  typename node::typed_node_t<Kind> node(
678  T&& arg, Ts&&... node_params_ctor_arguments) const
679  {
680  // Note: arg may be either the first parameters constructor argument, or a context passed
681  // before the constructor arguments; due to the lack of C++17's if constexpr, we can only act
682  // on this knowledge in another function.
683  static constexpr const bool inserter_takes_context = node::detail_::kind_traits<Kind>::inserter_takes_context;
684  return template_::detail_::build_params_and_insert_node_wrapper<Kind>(
685  cuda::detail_::bool_constant<inserter_takes_context>{}, handle(),
686  ::std::forward<T>(arg), ::std::forward<Ts>(node_params_ctor_arguments)...);
687  }
688  }; // insert_t
689 
696  class delete_t {
697  protected:
698  const template_t &associated_template;
699  handle_type handle() const noexcept { return associated_template.handle(); }
700 
701  public:
702  delete_t(const template_t &template_) : associated_template(template_) {}
703 
704  void node(node_ref_type node) const
705  {
706  auto status = cuGraphDestroyNode(node.handle());
707  throw_if_error_lazy(status, "Deleting " + node::detail_::identify(node)
708  + " in " + template_::detail_::identify(associated_template));
709  }
710 
711  void edge(edge_type const& edge_) const
712  {
713  // TODO: Perhaps factor this out into an independent function under template_::detail?
714  struct {
715  const node::handle_t source;
716  const node::handle_t dest;
717  } handles { edge_.first.handle(), edge_.second.handle() };
718  static constexpr size_t remove_single_edge { 1 };
719  auto status = template_::detail_::delete_edges(
720  handle(), &handles.source, &handles.dest, remove_single_edge);
721 
722  throw_if_error_lazy(status, "Destroying " + node::detail_::identify(edge_)
723  + " in " + template_::detail_::identify(associated_template));
724  }
725 
726  void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations) const
727  {
728  if (sources.size() != destinations.size()) {
729  throw ::std::invalid_argument(
730  "Differing number of source nodes and destination nodes ("
731  + ::std::to_string(sources.size()) + " != " + ::std::to_string(destinations.size())
732  + " in a request to insert edges into " + template_::detail_::identify(associated_template) );
733  }
734  auto status = template_::detail_::delete_edges(handle(), sources, destinations);
735 
736  throw_if_error_lazy(status, "Destroying " + ::std::to_string(sources.size()) + " edges in "
737  + template_::detail_::identify(associated_template));
738  }
739 
740  void edges(span<edge_type> edges) const
741  {
742  auto status = template_::detail_::delete_edges(handle(), edges);
743 
744  throw_if_error_lazy(status, "Destroying " + ::std::to_string(edges.size()) + " edges in "
745  + template_::detail_::identify(associated_template));
746  }
747  }; // delete_t
748 
749 public: // friendship
750 
751  friend template_t template_::wrap(handle_type handle, bool take_ownership) noexcept;
752 
753 protected: // constructors
754  template_t(handle_type handle, bool owning) noexcept
755  : handle_(handle), owning_(owning)
756  { }
757 
758 public: // ctors & dtor
759  template_t(const template_t& other) noexcept = delete;
760  template_t(template_t&& other) noexcept : template_t(other.handle_, other.owning_)
761  {
762  other.owning_ = false;
763  }
764 
765  ~template_t() DESTRUCTOR_EXCEPTION_SPEC
766  {
767  if (owning_) {
768  auto status = cuGraphDestroy(handle_);
769 #ifdef THROW_IN_DESTRUCTORS
770  throw_if_error_lazy(status, "Destroying " + template_::detail_::identify(*this));
771 #else
772  (void) status;
773 #endif
774  }
775  }
776 
777 public: // operators
778  template_t& operator=(const template_t&) = delete;
779  template_t& operator=(template_t&& other) noexcept
780  {
781  ::std::swap(handle_, other.handle_);
782  ::std::swap(owning_, other.owning_);
783  return *this;
784  }
785 
786 public: // non-mutators
787  instance_t instantiate(
788 #if CUDA_VERSION >= 11040
789  bool free_previous_allocations_before_relaunch = false
790 #endif
791 #if CUDA_VERSION >= 11700
792  , bool use_per_node_priorities = false
793 #endif
794 #if CUDA_VERSION >= 12000
795  , bool upload_on_instantiation = false
796  , bool make_device_launchable = false
797 #endif
798  );
799 
800 #if CUDA_VERSION >= 13010
801  id_t get_id() const
804  {
805  return template_::detail_::get_id(handle_);
806  }
807 #endif // CUDA_VERSION >= 13010
808 
809 public: // data members
810  const insert_t insert { *this };
811  const delete_t delete_ { *this };
812 private: // data members
813  // Note: A CUDA graph template is not specific to a context, nor a device!
814  template_::handle_t handle_;
815  bool owning_;
816 }; // class template_t
817 
818 namespace template_ {
819 
820 inline template_t wrap(handle_t handle, bool take_ownership) noexcept
821 {
822  return { handle, take_ownership };
823 }
824 
825 inline template_t create()
826 {
827  constexpr const unsigned flags { 0 };
828  handle_t handle;
829  auto status = cuGraphCreate(&handle, flags);
830  throw_if_error_lazy(status, "Creating a CUDA graph");
831  return wrap(handle, do_take_ownership);
832 }
833 
834 inline ::std::string identify(const template_t& template_)
835 {
836  return "CUDA execution graph template at " + cuda::detail_::ptr_as_hex(template_.handle());
837 }
838 
839 constexpr const ::std::initializer_list<node_t> no_dependencies {};
840 
841 template <node::kind_t Kind, template <typename> class Container, typename NodeOrHandle, typename... NodeParametersCtorParams>
842 node::typed_node_t<Kind> insert_node(
843  const template_t& graph,
844  Container<NodeOrHandle> dependencies,
845  NodeParametersCtorParams... node_parameters_ctor_params)
846 {
847  using traits_type = typename node::detail_::kind_traits<Kind>;
848  node::parameters_t<Kind> params { ::std::forward<NodeParametersCtorParams>(node_parameters_ctor_params)... };
849  auto raw_params = traits_type::marshal(params);
850  auto untyped_node = template_::detail_::insert_node(graph.handle(), raw_params, dependencies);
851  return node::wrap<Kind>(untyped_node.containing_graph(), untyped_node.handle(), params);
852  // Remember: untyped_node is not an owning object, so nothing is released (nor
853  // is ownership passed in the returned typed_node
854 }
855 
856 } // namespace template_
857 
858 inline template_t create()
859 {
860  return template_::create();
861 }
862 
863 
878 inline optional<node_t> find_in_clone(node_t node, const template_t& cloned_graph)
879 {
880  // The find function sets the result to 0 (nullptr) if the input
881  // parameters were valid, but the node was not found
882  auto search_result = reinterpret_cast<node::handle_t>(0x1);
883  auto status = cuGraphNodeFindInClone(&search_result, node.handle(), cloned_graph.handle());
884  if (status == cuda::status::invalid_value and search_result != nullptr) {
885  return nullopt;
886  }
887  throw_if_error_lazy(status, "Searching for a copy of " + node::detail_::identify(node) + " in " + template_::detail_::identify(cloned_graph));
888  return node::wrap(cloned_graph.handle(), search_result);
889 }
890 
891 } // namespace graph
892 
893 } // namespace cuda
894 
895 #endif // CUDA_VERSION >= 10000
896 
897 #endif // CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:457
void free(void *ptr)
Free a region of device-side memory (regardless of how it was allocated)
Definition: memory.hpp:126
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:396
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:78
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
Graph template node proxy (base-)class base-class node_t and supporting code.
Fundamental CUDA-related type definitions.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74