cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
template.hpp
Go to the documentation of this file.
1 
10 #pragma once
11 #ifndef CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
12 #define CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
13 
14 #if CUDA_VERSION >= 10000
15 
17 #include <cuda/api/graph/node.hpp>
18 #include <cuda/api/types.hpp>
19 #include <cuda/api/error.hpp>
21 
22 #include <vector>
23 #include <cassert>
24 #include <algorithm>
25 
26 namespace cuda {
27 
29 class device_t;
30 class stream_t;
32 
33 namespace graph {
34 
36 class template_t;
37 class instance_t;
39 
40 namespace node {
41 
42 namespace detail_ {
43 
44 // I'm not so sure about this...
45 using edge_t = ::std::pair<node_t, node_t>;
46 
47 inline ::std::string identify(const edge_t &edge)
48 {
49  return ::std::string("edge from " + node::detail_::identify(edge.first)
50  + " to " + node::detail_::identify(edge.second));
51 }
52 
53 template <typename NodeOrHandle>
54 handle_t as_handle(const NodeOrHandle& node_or_handle) noexcept
55 {
56  return node_or_handle.handle();
57 }
58 
59 template <> inline handle_t as_handle(const handle_t& handle) noexcept { return handle; }
60 
61 template <template <typename> class Container, typename NodeOrHandle>
62 struct as_handles_partial_specialization_helper{
63  static typename ::std::conditional<
64  ::std::is_same<NodeOrHandle, handle_t>::value,
65  span<handle_t>,
66  ::std::vector<handle_t>
67  >::type
68  as_handles(Container<NodeOrHandle>&& nodes_or_handles)
69  {
70  static_assert(
71  ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type, node::handle_t>::value or
72  ::std::is_same<typename ::std::remove_const<NodeOrHandle>::type, node::handle_t>::value,
73  "Unsupported graph node dependency specifier type. Use either cuda::graph::node_t or cuda::graph::node::handle_t");
74  ::std::vector<handle_t> handles;
75  handles.reserve(nodes_or_handles.size());
76  ::std::transform(
77  nodes_or_handles.begin(),
78  nodes_or_handles.end(),
79  ::std::back_inserter(handles),
80  as_handle<NodeOrHandle> );
81  return handles;
82  }
83 };
84 
85 template <template <typename> class Container>
86 struct as_handles_partial_specialization_helper<Container, handle_t> {
87  Container<handle_t> as_handles(Container<handle_t>&& node_handles)
88  {
89  return node_handles;
90  }
91 };
92 
93 template <template <typename> class Container, typename NodeOrHandle>
94 static typename ::std::conditional<
95  ::std::is_same<NodeOrHandle, handle_t>::value,
96  span<handle_t>,
97  ::std::vector<handle_t>
98 >::type
99 as_handles(Container<NodeOrHandle>&& nodes_or_handles)
100 {
101  return as_handles_partial_specialization_helper<Container, handle_t>::as_handles(
102  ::std::forward<Container<NodeOrHandle>>(nodes_or_handles));
103 }
104 
105 
106 } // namespace detail_
107 
108 } // namespace node
109 
110 namespace template_ {
111 
112 template_t wrap(handle_t handle, bool take_ownership = false) noexcept;
113 
114 namespace detail_ {
115 
116 ::std::string identify(const template_t& template_);
117 
118 inline status_t delete_edges(
119  template_::handle_t template_handle,
120  span<const node::handle_t> edge_source_handles,
121  span<const node::handle_t> edge_destination_handles)
122 {
123  auto num_edges = edge_source_handles.size();
124  assert(edge_source_handles.size() == num_edges && "Mismatched sizes of sources and destinations");
125 
126  auto result = cuGraphRemoveDependencies(
127  template_handle,edge_source_handles.data(), edge_destination_handles.data(), num_edges);
128  return result;
129 }
130 
131 inline status_t delete_edges(
132  template_::handle_t template_handle,
133  span<const node_t> edge_sources,
134  span<const node_t> edge_destinations)
135 {
136  auto num_edges = edge_sources.size();
137  assert(edge_destinations.size() == num_edges && "Mismatched sizes of sources and destinations");
138 
139  // TODO: With C++14, consider make_unique here and no container
140  auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
141  {
142  auto handles_iter = handles_buffer;
143  ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
144  [](const node_t &node) { return node.handle(); });
145  ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
146  [](const node_t &node) { return node.handle(); });
147  }
148  span<const node::handle_t> edge_source_handles { handles_buffer.data(), num_edges };
149  span<const node::handle_t> edge_destination_handles { handles_buffer.data() + num_edges, num_edges };
150  return delete_edges(template_handle, edge_source_handles, edge_destination_handles);
151 }
152 
153 // Note: duplication of code with delete_edges
154 inline status_t insert_edges(
155  template_::handle_t template_handle,
156  span<const node_t> edge_sources,
157  span<const node_t> edge_destinations)
158 {
159  auto num_edges = edge_sources.size();
160  assert(edge_destinations.size() == num_edges && "Mismatched sizes of sources and destinations");
161 
162  // TODO: With C++14, consider make_unique here and no container
163  auto handles_buffer = ::std::vector<node::handle_t>{num_edges * 2};
164  {
165  auto handles_iter = handles_buffer;
166  ::std::transform(edge_sources.begin(), edge_sources.end(), handles_buffer.data(),
167  [](const node_t &node) { return node.handle(); });
168  ::std::transform(edge_destinations.begin(), edge_destinations.end(), handles_buffer.data() + num_edges,
169  [](const node_t &node) { return node.handle(); });
170  }
171  const node::handle_t* sources_handles = handles_buffer.data();
172  const node::handle_t* destinations_handles = handles_buffer.data() + num_edges;
173  auto result = cuGraphAddDependencies(
174  template_handle,sources_handles, destinations_handles, edge_sources.size());
175  return result;
176 }
177 
178 inline status_t delete_edges(
179  template_::handle_t template_handle,
180  span<const node::detail_::edge_t> edges)
181 {
182  // TODO: With C++14, consider make_unique here
183  auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
184  auto sources_iterator = handles_buffer.begin();
185  auto destinations_iterator = handles_buffer.begin() + edges.size();
186  for(const auto& edge : edges) {
187  *(sources_iterator++) = edge.first.handle();
188  *(destinations_iterator++) = edge.second.handle();
189  }
190  const node::handle_t* sources_handles = handles_buffer.data();
191  const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
192  auto result = cuGraphRemoveDependencies(
193  template_handle,sources_handles, destinations_handles, edges.size());
194  return result;
195 }
196 
197 // Note: duplication of code with delete_edges
198 inline status_t insert_edges(
199  template_::handle_t template_handle,
200  span<const node::detail_::edge_t> edges)
201 {
202  // TODO: With C++14, consider make_unique here
203  auto handles_buffer = ::std::vector<node::handle_t>{edges.size() * 2};
204  auto sources_iterator = handles_buffer.begin();
205  auto destinations_iterator = handles_buffer.begin() + edges.size();
206  for(const auto& edge : edges) {
207  *(sources_iterator++) = edge.first.handle();
208  *(destinations_iterator++) = edge.second.handle();
209  }
210  const node::handle_t* sources_handles = handles_buffer.data();
211  const node::handle_t* destinations_handles = handles_buffer.data() + edges.size();
212  auto result = cuGraphAddDependencies(
213  template_handle,sources_handles, destinations_handles, edges.size());
214  return result;
215 }
216 
217 template <node::kind_t Kind>
218 status_t invoke_inserter_possibly_with_context(
219  cuda::detail_::bool_constant<false>,
220  node::handle_t& new_node_handle,
221  template_::handle_t graph_template_handle,
222  CUgraphNode* dependency_handles,
223  size_t num_dependency_handles,
224  typename node::detail_::kind_traits<Kind>::raw_parameters_type&
225  raw_params,
227 {
228  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
229  return node::detail_::kind_traits<Kind>::inserter(
230  &new_node_handle,
231  graph_template_handle,
232  dependency_handles,
233  num_dependency_handles,
234  raw_params_maybe_ptr);
235 }
236 
237 template <node::kind_t Kind>
238 status_t invoke_inserter_possibly_with_context(
239  cuda::detail_::bool_constant<true>,
240  node::handle_t& new_node_handle,
241  template_::handle_t graph_template_handle,
242  CUgraphNode* dependency_handles,
243  size_t num_dependency_handles,
244  typename node::detail_::kind_traits<Kind>::raw_parameters_type&
245  raw_params,
246  context::handle_t context_handle)
247 {
248  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
249  return node::detail_::kind_traits<Kind>::inserter(
250  &new_node_handle,
251  graph_template_handle,
252  dependency_handles,
253  num_dependency_handles,
254  raw_params_maybe_ptr,
255  context_handle);
256 }
257 
258 template <node::kind_t Kind>
259 node::handle_t insert_node(
260  template_::handle_t graph_template_handle,
261  context::handle_t context_handle,
262  typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
263 {
264  using traits_type = typename node::detail_::kind_traits<Kind>;
265 
266  // Defining a useless bool here to circumvent gratuitous warnings from MSVC
267  const bool context_needed_but_missing =
268  traits_type::inserter_takes_context and context_handle == context::detail_::none;
269  if (context_needed_but_missing) {
270  throw ::std::invalid_argument(
271  "Attempt to insert a CUDA graph template " + ::std::string(traits_type::name)
272  + " node without specifying an execution context");
273  }
274 
275  node::handle_t new_node_handle;
276  auto no_dependency_handles = nullptr;
277  size_t no_dependencies_size = 0;
278  auto status = invoke_inserter_possibly_with_context<Kind>(
279  cuda::detail_::bool_constant<traits_type::inserter_takes_context>{},
280  new_node_handle,
281  graph_template_handle,
282  no_dependency_handles,
283  no_dependencies_size,
284  raw_params,
285  context_handle);
286  throw_if_error_lazy(status, "Inserting a " + ::std::string(traits_type::name) + " node into "
287  + template_::detail_::identify(graph_template_handle));
288  return new_node_handle;
289 }
290 
291 template <node::kind_t Kind, typename... Ts>
292 node::typed_node_t<Kind> build_params_and_insert_node(
293  template_::handle_t graph_template_handle,
294  context::handle_t context_handle,
295  Ts&&... params_ctor_args)
296 {
297 
298  using traits_type = typename node::detail_::kind_traits<Kind>;
299  using parameters_t = typename traits_type::parameters_type;
300 
301  // TODO: Why won't this work?
302  // static_assert(::std::is_constructible<parameters_t, Ts...>::value,
303  // "Node parameters are not constructible from the arguments passed");
304 
305  parameters_t params { ::std::forward<Ts>(params_ctor_args)... };
306  typename traits_type::raw_parameters_type raw_params = traits_type::marshal(params);
307  auto node_handle = insert_node<Kind>(graph_template_handle, context_handle, raw_params);
308  return node::wrap<Kind>(graph_template_handle, node_handle, ::std::move(params));
309 }
310 
311 template <node::kind_t Kind, typename... Ts>
312 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
313  cuda::detail_::true_type, // we've been given a context
314  template_::handle_t graph_template_handle,
315  const context_t& context,
316  Ts&&... params_ctor_args)
317 {
318  return build_params_and_insert_node<Kind>(graph_template_handle, context.handle(), ::std::forward<Ts>(params_ctor_args)...);
319 }
320 
321 template <node::kind_t Kind, typename... Ts>
322 node::typed_node_t<Kind> get_context_handle_build_params_and_insert_node(
323  cuda::detail_::false_type, // We've not been given a context
324  template_::handle_t graph_template_handle,
325  Ts&&... params_ctor_args)
326 {
327  auto current_context_handle = context::current::detail_::get_handle();
328  // TODO: Consider handling the case of no current context, e.g. by using the default device' primary context
329  return build_params_and_insert_node<Kind>(
330  graph_template_handle, current_context_handle, ::std::forward<Ts>(params_ctor_args)...);
331 }
332 
333 
334 template <node::kind_t Kind, typename... Ts>
335 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
336  cuda::detail_::false_type , // inserter doesn't takes a context
337  template_::handle_t graph_template_handle,
338  Ts&&... params_ctor_args)
339 {
340  return build_params_and_insert_node<Kind>(graph_template_handle, context::detail_::none, ::std::forward<Ts>(params_ctor_args)...);
341 }
342 
343 template <node::kind_t Kind, typename T, typename... Ts>
344 node::typed_node_t<Kind> build_params_and_insert_node_wrapper(
345  cuda::detail_::true_type, // inserter takes a context
346  template_::handle_t graph_template_handle,
347  T&& first_arg, // still don't know of T is a context or something else
348  Ts&&... params_ctor_args)
349 {
350  static constexpr const bool first_arg_is_a_context =
351  ::std::is_same<typename cuda::detail_::remove_reference_t<T>, cuda::context_t>::value;
352  return get_context_handle_build_params_and_insert_node<Kind>(
353 //return blah<Kind>(
354  cuda::detail_::bool_constant<first_arg_is_a_context>{},
355  graph_template_handle, ::std::forward<T>(first_arg), ::std::forward<Ts>(params_ctor_args)...);
356 }
357 
358 } // namespace detail_
359 
360 } // namespace template_
361 
369 class template_t {
370 public: // type definitions
371  using size_type = size_t;
372  using handle_type = template_::handle_t;
373 
374  using node_ref_type = node_t;
375 
378  using edge_type = ::std::pair<node_ref_type, node_ref_type>;
379 
380  using node_ref_container_type = ::std::vector<node_t>;
381 
384  using edge_container_type = ::std::vector<edge_type>;
385 
386 public: // getters
387 
389  handle_type handle() const noexcept { return handle_; }
390 
392  bool is_owning() const noexcept { return owning_; }
393 
394 public: // non-mutators
395 
396 #if CUDA_VERSION >= 11030
397  struct dot_printing_options_t {
398  bool debug_data;
399  bool use_runtime_types;
400  // TODO: Consider having a map/array mapping of kind_t's to bools
401  struct {
402  struct {
403  bool kernel;
404  bool host_function;
405  } launch;
406  struct {
407  bool allocate;
408  bool free;
409  bool copy;
410  bool set;
411  } memory_ops;
412  bool event;
413  struct {
414  bool signal;
415  bool wait;
416  } external_semaphore;
417  } node_params;
418  bool kernel_node_attributes;
419  bool node_and_kernel_function_handles;
420 
421  unsigned compose() const {
422  return 0u
423  | (debug_data ? CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE : 0)
424  | (use_runtime_types ? CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES : 0)
425  | (node_params.launch.kernel ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS : 0)
426  | (node_params.launch.host_function ? CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS : 0)
427 #if CUDA_VERSION >= 11040
428  | (node_params.memory_ops.allocate ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS : 0)
429  | (node_params.memory_ops.free ? CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS : 0)
430 #endif // CUDA_VERSION >= 11040
431  | (node_params.memory_ops.copy ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS : 0)
432  | (node_params.memory_ops.set ? CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS : 0)
433  | (node_params.event ? CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS : 0)
434  | (node_params.external_semaphore.signal ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS : 0)
435  | (node_params.external_semaphore.wait ? CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS : 0)
436  | (kernel_node_attributes ? CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES : 0)
437  | (node_and_kernel_function_handles ? CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES : 0)
438  ;
439  }
440  // TODO: Consider initializing all fields to false in the default constructor
441  };
442 #endif // CUDA_VERSION >= 11030
443 
451  template_t clone() const
452  {
453  handle_type clone_handle;
454  auto status = cuGraphClone(&clone_handle, handle_);
455  throw_if_error_lazy(status, "Cloning " + template_::detail_::identify(*this));
456  return template_t{ clone_handle, do_take_ownership };
457  }
458 
459 #if CUDA_VERSION >= 11030
460 
468  void print_dot(const char* dot_filename, dot_printing_options_t printing_options = {}) const
469  {
470  auto status = cuGraphDebugDotPrint(handle_, dot_filename, printing_options.compose());
471  throw_if_error_lazy(status, "Printing " + template_::detail_::identify(*this) + " to file " + dot_filename);
472  }
473 #endif // CUDA_VERSION >= 11030
474 
477  size_type num_nodes() const
478  {
479  ::std::size_t num_nodes_;
480  auto status = cuGraphGetNodes(handle_, nullptr, &num_nodes_);
481  throw_if_error_lazy(status, "Obtaining the number of nodes in " + template_::detail_::identify(*this));
482  return num_nodes_;
483  }
484 
492  node_ref_container_type nodes() const
493  {
494  size_type num_nodes_ { num_nodes() } ;
495  ::std::vector<node::handle_t> node_handles {num_nodes_ };
496  auto status = cuGraphGetNodes(handle_, node_handles.data(), &num_nodes_);
497  throw_if_error_lazy(status, "Obtaining the set of nodes of " + template_::detail_::identify(*this));
498  node_ref_container_type node_refs;
499  for (const auto& node_handle : node_handles) {
500  node_refs.emplace_back(node::wrap(handle_, node_handle));
501  }
502  return node_refs;
503  }
504 
509  size_type num_roots() const
510  {
511  // Note: Code duplication with num_nodes()
512  ::std::size_t num_roots_;
513  auto status = cuGraphGetRootNodes(handle_, nullptr, &num_roots_);
514  throw_if_error_lazy(status, "Obtaining the number of root nodes in " + template_::detail_::identify(*this));
515  return num_roots_;
516  }
517 
522  node_ref_container_type roots() const
523  {
524  // Note: Code duplication wiuth nodes()
525  size_type num_roots_ {num_roots() } ;
526  ::std::vector<node::handle_t> root_node_handles {num_roots_ };
527  auto status = cuGraphGetRootNodes(handle_, root_node_handles.data(), &num_roots_);
528  throw_if_error_lazy(status, "Obtaining the set of root nodes of " + template_::detail_::identify(*this));
529  node_ref_container_type root_node_refs;
530  for (const auto& node_handle : root_node_handles) {
531  root_node_refs.emplace_back(node::wrap(handle_, node_handle));
532  }
533  return root_node_refs;
534  }
535 
537  size_type num_edges() const
538  {
539  size_type num_edges;
540  auto status = cuGraphGetEdges(handle_, nullptr, nullptr, &num_edges);
541  throw_if_error_lazy(status, "Obtaining the number of edges in " + template_::detail_::identify(*this));
542  return num_edges;
543  }
544 
545  edge_container_type edges() const
546  {
547  size_type num_edges_ { num_edges() } ;
548  ::std::vector<node::handle_t> from_node_handles { num_edges_ };
549  ::std::vector<node::handle_t> to_node_handles { num_edges_ };
550  auto status = cuGraphGetEdges(handle_, from_node_handles.data(), to_node_handles.data(), &num_edges_);
551  throw_if_error_lazy(status, "Obtaining the set of edges in " + template_::detail_::identify(*this));
552  edge_container_type edges;
553  // TODO: Use container/range zipping, and a ranged-for loop
554  {
555  auto from_iter = from_node_handles.cbegin();
556  auto to_iter = from_node_handles.cbegin();
557  for (; from_iter != from_node_handles.cend(); from_iter++, to_iter++) {
558  assert(to_iter != to_node_handles.cend());
559  auto from_node_ref = node::wrap(handle_, *from_iter);
560  auto to_node_ref = node::wrap(handle_, *to_iter);
561  edges.emplace_back(from_node_ref, to_node_ref);
562  }
563  }
564  return edges;
565  }
566 
577  class insert_t {
578  protected:
579  const template_t& associated_template;
580 
581  template_::handle_t handle() const noexcept { return associated_template.handle(); }
582 
583  public:
584  insert_t(const template_t& template_) : associated_template(template_) {}
585 
586  void edge(node_ref_type source, node_ref_type dest) const
587  {
588  struct {
589  const node::handle_t source;
590  const node::handle_t dest;
591  } handles { source.handle(), dest.handle() };
592  static constexpr const size_t remove_just_one = 1;
593  auto status = cuGraphAddDependencies(
594  handle(), &handles.source, &handles.dest, remove_just_one);
595 
596  throw_if_error_lazy(status, "Inserting " + node::detail_::identify(edge_type{source, dest})
597  + " into " + template_::detail_::identify(associated_template));
598  }
599 
600  void edge(edge_type edge_) const
601  {
602  return edge(edge_.first, edge_.second);
603  }
604 
605  void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations) const
606  {
607  if (sources.size() != destinations.size()) {
608  throw ::std::invalid_argument(
609  "Differing number of source nodes and destination nodes ("
610  + ::std::to_string(sources.size()) + " != " + ::std::to_string(destinations.size())
611  + " in a request to insert edges into " + template_::detail_::identify(associated_template) );
612  }
613  auto status = template_::detail_::insert_edges(handle(), sources, destinations);
614 
615  throw_if_error_lazy(status, "Destroying " + ::std::to_string(sources.size()) + " edges in "
616  + template_::detail_::identify(associated_template));
617  }
618 
619  void edges(span<const edge_type> edges) const
620  {
621  auto status = template_::detail_::insert_edges(handle(), edges);
622 
623  throw_if_error_lazy(status, "Inserting " + ::std::to_string(edges.size()) + " edges into "
624  + template_::detail_::identify(associated_template));
625  }
626 
627  template <node::kind_t Kind, typename T, typename... Ts>
628  typename node::typed_node_t<Kind> node(
629  T&& arg, Ts&&... node_params_ctor_arguments) const
630  {
631  // Note: arg may be either the first parameters constructor argument, or a context passed
632  // before the constructor arguments; due to the lack of C++17's if constexpr, we can only act
633  // on this knowledge in another function.
634  static constexpr const bool inserter_takes_context = node::detail_::kind_traits<Kind>::inserter_takes_context;
635  return template_::detail_::build_params_and_insert_node_wrapper<Kind>(
636  cuda::detail_::bool_constant<inserter_takes_context>{}, handle(),
637  ::std::forward<T>(arg), ::std::forward<Ts>(node_params_ctor_arguments)...);
638  }
639  }; // insert_t
640 
647  class delete_t {
648  protected:
649  const template_t &associated_template;
650  handle_type handle() const noexcept { return associated_template.handle(); }
651 
652  public:
653  delete_t(const template_t &template_) : associated_template(template_) {}
654 
655  void node(node_ref_type node) const
656  {
657  auto status = cuGraphDestroyNode(node.handle());
658  throw_if_error_lazy(status, "Deleting " + node::detail_::identify(node)
659  + " in " + template_::detail_::identify(associated_template));
660  }
661 
662  void edge(edge_type const& edge_) const
663  {
664  struct {
665  const node::handle_t source;
666  const node::handle_t dest;
667  } handles { edge_.first.handle(), edge_.second.handle() };
668  static constexpr const size_t remove_just_one = 1;
669  auto status = cuGraphRemoveDependencies(
670  handle(), &handles.source, &handles.dest, remove_just_one);
671 
672  throw_if_error_lazy(status, "Destroying " + node::detail_::identify(edge_)
673  + " in " + template_::detail_::identify(associated_template));
674  }
675 
676  void edges(span<const node_ref_type> sources, span<const node_ref_type> destinations) const
677  {
678  if (sources.size() != destinations.size()) {
679  throw ::std::invalid_argument(
680  "Differing number of source nodes and destination nodes ("
681  + ::std::to_string(sources.size()) + " != " + ::std::to_string(destinations.size())
682  + " in a request to insert edges into " + template_::detail_::identify(associated_template) );
683  }
684  auto status = template_::detail_::delete_edges(handle(), sources, destinations);
685 
686  throw_if_error_lazy(status, "Destroying " + ::std::to_string(sources.size()) + " edges in "
687  + template_::detail_::identify(associated_template));
688  }
689 
690  void edges(span<edge_type> edges) const
691  {
692  auto status = template_::detail_::delete_edges(handle(), edges);
693 
694  throw_if_error_lazy(status, "Destroying " + ::std::to_string(edges.size()) + " edges in "
695  + template_::detail_::identify(associated_template));
696  }
697  }; // delete_t
698 
699 public: // friendship
700 
701  friend template_t template_::wrap(handle_type handle, bool take_ownership) noexcept;
702 
703 protected: // constructors
704  template_t(handle_type handle, bool owning) noexcept
705  : handle_(handle), owning_(owning)
706  { }
707 
708 public: // ctors & dtor
709  template_t(const template_t& other) noexcept = delete;
710  template_t(template_t&& other) noexcept : template_t(other.handle_, other.owning_)
711  {
712  other.owning_ = false;
713  }
714 
715  ~template_t() noexcept(false)
716  {
717  if (owning_) {
718  auto status = cuGraphDestroy(handle_);
719  throw_if_error_lazy(status, "Destroying " + template_::detail_::identify(*this));
720  }
721  }
722 
723 public: // operators
724  template_t& operator=(const template_t&) = delete;
725  template_t& operator=(template_t&& other) noexcept
726  {
727  ::std::swap(handle_, other.handle_);
728  ::std::swap(owning_, other.owning_);
729  return *this;
730  }
731 
732 public: // non-mutators
733  instance_t instantiate(
734 #if CUDA_VERSION >= 11040
735  bool free_previous_allocations_before_relaunch = false
736 #endif
737 #if CUDA_VERSION >= 11700
738  , bool use_per_node_priorities = false
739 #endif
740 #if CUDA_VERSION >= 12000
741  , bool upload_on_instantiation = false
742  , bool make_device_launchable = false
743 #endif
744  );
745 
746 public: // data members
747  const insert_t insert { *this };
748  const delete_t delete_ { *this };
749 private: // data members
750  // Note: A CUDA graph template is not specific to a context, nor a device!
751  template_::handle_t handle_;
752  bool owning_;
753 }; // class template_t
754 
755 namespace template_ {
756 
757 inline template_t wrap(handle_t handle, bool take_ownership) noexcept
758 {
759  return { handle, take_ownership };
760 }
761 
762 inline template_t create()
763 {
764  constexpr const unsigned flags { 0 };
765  handle_t handle;
766  auto status = cuGraphCreate(&handle, flags);
767  throw_if_error_lazy(status, "Creating a CUDA graph");
768  return wrap(handle, do_take_ownership);
769 }
770 
771 inline ::std::string identify(const template_t& template_)
772 {
773  return "CUDA execution graph template at " + cuda::detail_::ptr_as_hex(template_.handle());
774 }
775 
776 constexpr const ::std::initializer_list<node_t> no_dependencies {};
777 
778 template <node::kind_t Kind, template <typename> class Container, typename NodeOrHandle, typename... NodeParametersCtorParams>
779 node::typed_node_t<Kind> insert_node(
780  const template_t& graph,
781  Container<NodeOrHandle> dependencies,
782  NodeParametersCtorParams... node_parameters_ctor_params)
783 {
784  using traits_type = typename node::detail_::kind_traits<Kind>;
785  node::parameters_t<Kind> params { ::std::forward<NodeParametersCtorParams>(node_parameters_ctor_params)... };
786  auto raw_params = traits_type::marshal(params);
787  auto untyped_node = template_::detail_::insert_node(graph.handle(), raw_params, dependencies);
788  return node::wrap<Kind>(untyped_node.containing_graph(), untyped_node.handle(), params);
789  // Remember: untyped_node is not an owning object, so nothing is released (nor
790  // is ownership passed in the returned typed_node
791 }
792 
793 } // namespace template_
794 
795 inline template_t create()
796 {
797  return template_::create();
798 }
799 
800 
815 inline optional<node_t> find_in_clone(node_t node, const template_t& cloned_graph)
816 {
817  // The find function sets the result to 0 (nullptr) if the input
818  // parameters were valid, but the node was not found
819  auto search_result = reinterpret_cast<node::handle_t>(0x1);
820  auto status = cuGraphNodeFindInClone(&search_result, node.handle(), cloned_graph.handle());
821  if (status == cuda::status::invalid_value and search_result != nullptr) {
822  return nullopt;
823  }
824  throw_if_error_lazy(status, "Searching for a copy of " + node::detail_::identify(node) + " in " + template_::detail_::identify(cloned_graph));
825  return node::wrap(cloned_graph.handle(), search_result);
826 }
827 
828 } // namespace graph
829 
830 } // namespace cuda
831 
832 #endif // CUDA_VERSION >= 10000
833 
834 #endif // CUDA_API_WRAPPERS_GRAPH_TEMPLATE_HPP
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
void wait(const event_t &event)
Have the calling thread wait - either busy-waiting or blocking - and return only after this event has...
Definition: event.hpp:467
void free(void *ptr)
Free a region of device-side memory (regardless of how it was allocated)
Definition: memory.hpp:130
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
::std::size_t size_t
A size type for use throughout the wrappers library (except when specific API functions limit the siz...
Definition: types.hpp:81
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
Facilities for exception-based handling of Runtime and Driver API errors, including a basic exception...
Graph template node proxy (base-)class base-class node_t and supporting code.
Fundamental CUDA-related type definitions.
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77