cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
instance.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_INSTANCE_HPP
9 #define CUDA_API_WRAPPERS_INSTANCE_HPP
10 
11 #if CUDA_VERSION >= 10000
12 
13 #include "node.hpp"
14 #include "identify.hpp"
15 #include "../types.hpp"
16 
17 namespace cuda {
18 
19 class stream_t;
20 
21 namespace graph {
22 
23 class instance_t;
24 
26 class template_t;
28 
29 namespace instance {
30 
31 using update_status_t = CUgraphExecUpdateResult;
32 
33 namespace update_status {
34 
35 enum named_t : ::std::underlying_type<update_status_t>::type {
36  success = CU_GRAPH_EXEC_UPDATE_SUCCESS,
37  failure_for_unexpected_reason = CU_GRAPH_EXEC_UPDATE_ERROR,
38  topology_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED,
39  node_type_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED,
40  kernel_node_function_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED,
41  unsupported_kind_of_parameter_change = CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED,
42  unsupported_aspect_of_node = CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED,
43 #if CUDA_VERSION >= 11020
44  unsupported_kind_of_kernel_node_function_change = CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE,
45 #if CUDA_VERSION >= 11060
46  unsupported_kind_of_node_attributes_change = CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED,
47 #endif // CUDA_VERSION >= 11060
48 #endif // CUDA_VERSION >= 11020
49 };
50 
51 constexpr inline bool operator==(const update_status_t &lhs, const named_t &rhs) noexcept { return lhs == static_cast<update_status_t>(rhs); }
52 constexpr inline bool operator!=(const update_status_t &lhs, const named_t &rhs) noexcept { return lhs != static_cast<update_status_t>(rhs); }
53 constexpr inline bool operator==(const named_t &lhs, const update_status_t &rhs) noexcept { return static_cast<update_status_t>(lhs) == rhs; }
54 constexpr inline bool operator!=(const named_t &lhs, const update_status_t &rhs) noexcept { return static_cast<update_status_t>(lhs) != rhs; }
55 
56 namespace detail_ {
57 
58 constexpr const char * const descriptions[] = {
59  "success",
60  "failure for an unexpected reason described in the return value of the function",
61  "topology has changed",
62  "node type has changed",
63  "kernel node function has changed",
64  "parameters changed in an unsupported way",
65  "something about the node is not supported",
66  "unsupported kind of kernel node function change",
67  "unsupported kind of node attributes change"
68 };
69 
70 inline bool is_node_specific(update_status_t update_status)
71 {
72  return
73  update_status != success and
74  update_status != failure_for_unexpected_reason and
75  update_status != topology_has_changed and
76  update_status != unsupported_kind_of_parameter_change;
77 }
78 
79 } // namespace detail_
80 
81 } // namespace update_status
82 
83 namespace detail_ {
84 
85 using flags_t = cuuint64_t;
86 
87 inline const char *describe(instance::update_status_t update_status)
88 {
89  return instance::update_status::detail_::descriptions[update_status];
90 }
91 
92 inline ::std::string describe(
93  instance::update_status_t update_status,
94  node::handle_t node_handle,
95  template_::handle_t graph_template_handle);
96 
97 #if CUDA_VERSION >= 13010
98 inline id_t get_id(handle_t handle)
99 {
100  id_t id;
101  auto status = cuGraphExecGetId(handle, &id);
102  throw_if_error_lazy(status, "Getting the local (DOT-printing) ID of " + identify(handle));
103  return id;
104 }
105 #endif // CUDA_VERSION >= 13010
106 
107 } // namespace detail_
108 
109 } // namespace instance
110 
111 
132  void launch(const instance_t& instance, const stream_t& stream);
133 
134 } // namespace graph
135 
139 inline ::std::string describe(graph::instance::update_status_t status)
140 {
141  return graph::instance::detail_::describe(status);
142 }
143 
144 ::std::string describe(graph::instance::update_status_t update_status, optional<graph::node_t> node);
145 
149 inline constexpr bool is_success(graph::instance::update_status_t status)
150 {
151  return status == graph::instance::update_status::success;
152 }
153 
157 constexpr bool is_failure(graph::instance::update_status_t status) { return not is_success(status); }
158 
159 namespace graph {
160 
161 namespace instance {
162 
163 instance_t wrap(template_::handle_t template_handle, handle_t handle, bool is_owning) noexcept;
164 
165 namespace detail_ {
166 
167 ::std::string identify(const instance_t &instance);
168 
169 } // namespace detail_
170 
171 // TODO: Add support for reporting errors involving edges
172 class update_failure : public ::std::runtime_error {
173 public:
174  using parent = ::std::runtime_error;
175 
176  update_failure(
177  update_status_t kind,
178  optional<node_t>&& impermissible_node,
179  ::std::string&& what_arg) noexcept
180  :
181  parent((what_arg.empty() ? "" : what_arg + ": ") + describe(kind, impermissible_node)),
182  kind_(kind),
183  impermissible_node_(::std::move(impermissible_node))
184  {
185  // TODO: Ensure the kind needs a node handle IFF a node handle has been provided
186  }
187 
188  update_failure(update_status_t kind, node_t impermissible_node) noexcept
189  : update_failure(kind, optional<node_t>(::std::move(impermissible_node)), "")
190  { }
191 
192  update_status_t kind() const noexcept { return kind_; }
193  node_t impermissible_node() const { return impermissible_node_.value(); }
194 
195 private:
196  update_status_t kind_;
197  optional<node_t> impermissible_node_;
198 };
199 
209 void update(const instance_t& destination, const template_t& source);
210 
211 namespace detail_ {
212 
213 template <node::kind_t Kind>
214 status_t set_node_parameters_nothrow(
215  const instance::handle_t instance_handle,
216  const node::handle_t node_handle,
217  const typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
218 {
219  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
220  return node::detail_::kind_traits<Kind>::instance_setter(instance_handle, node_handle, raw_params_maybe_ptr);
221 }
222 
223 } // namespace detail_
224 
225 
226 template <node::kind_t Kind>
227 void set_node_parameters(
228  const instance_t& instance,
229  const node_t& node,
230  const node::parameters_t<Kind> parameters);
231 
232 } // namespace instance
233 
234 namespace detail_ {
235 
236 inline void launch_graph_in_current_context(stream::handle_t stream_handle, instance::handle_t graph_instance_handle)
237 {
238  auto status = cuGraphLaunch(graph_instance_handle, stream_handle);
239  throw_if_error_lazy(status, "Trying to launch "
240  + instance::detail_::identify(graph_instance_handle) + " on " + stream::detail_::identify(stream_handle));
241 }
242 
243 inline void launch(context::handle_t context_handle, stream::handle_t stream_handle, instance::handle_t graph_instance_handle)
244 {
245  context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle);
246  launch_graph_in_current_context(stream_handle, graph_instance_handle);
247 }
248 
249 } // namespace detail_
250 
251 class instance_t {
252 public: // data types
253  using handle_type = instance::handle_t;
254 
255 public: // getters
256  template_::handle_t template_handle() const noexcept { return template_handle_; }
257  handle_type handle() const noexcept { return handle_; }
258  bool is_owning() const noexcept { return owning_; }
259 
260 protected: // constructors
261  instance_t(template_::handle_t template_handle, handle_type handle, bool owning) noexcept
262  : template_handle_(template_handle), handle_(handle), owning_(owning)
263  { }
264 
265 public: // constructors & destructor
266  instance_t(const instance_t& other) noexcept = delete;
267 
268  instance_t(instance_t&& other) noexcept : instance_t(other.template_handle_, other.handle_, other.owning_)
269  {
270  other.owning_ = false;
271  }
272  ~instance_t() DESTRUCTOR_EXCEPTION_SPEC
273  {
274  if (owning_) {
275  auto status = cuGraphExecDestroy(handle_);
276 #if THROW_IN_DESTRUCTORS
277  throw_if_error_lazy(status, "Destroying " + instance::detail_::identify(*this));
278 #else
279  (void) status;
280 #endif
281  }
282  }
283 
284 public: // operators
285  instance_t& operator=(const instance_t&) = delete;
286  instance_t& operator=(instance_t&& other) noexcept
287  {
288  ::std::swap(template_handle_, other.template_handle_);
289  ::std::swap(handle_, other.handle_);
290  ::std::swap(owning_, other.owning_);
291  return *this;
292  }
293 
294 
295 public: // friends
296  friend instance_t instance::wrap(template_::handle_t template_handle, handle_type handle, bool is_owning) noexcept;
297 
298 public: // non-mutators
299  void update(const template_t& update_source) const
300  {
301  instance::update(*this, update_source);
302  }
303 
304  void launch(const stream_t& stream) const
305  {
306  graph::launch(*this, stream);
307  }
308 #if CUDA_VERSION >= 11010
309  void upload(const stream_t& stream) const;
310 #endif // CUDA_VERSION >= 11010
311 
312 #if CUDA_VERSION >= 12000
313  bool frees_allocations_before_relaunch() const
314  {
315  instance::detail_::flags_t flags;
316  auto status = cuGraphExecGetFlags (handle_, &flags);
317  throw_if_error_lazy(status, "Obtaining execution graph instance flags");
318  return flags & CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH;
319  }
320 
321  bool uses_node_priorities() const
322  {
323  instance::detail_::flags_t flags;
324  auto status = cuGraphExecGetFlags (handle_, &flags);
325  throw_if_error_lazy(status, "Obtaining execution graph instance flags");
326  return flags & CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY;
327  }
328 
329 #endif
330 
331 #if CUDA_VERSION >= 13010
332  id_t get_id() const
335  {
336  return instance::detail_::get_id(handle_);
337  }
338 #endif // CUDA_VERSION >= 13010
339 
340  template <node::kind_t Kind>
341  void set_node_parameters(const node_t& node, node::parameters_t<Kind> new_parameters)
342  {
343  instance::set_node_parameters<Kind>(*this, node, ::std::move(new_parameters));
344  }
345 
346  template <node::kind_t Kind>
347  void set_node_parameters(const node::typed_node_t<Kind>& node)
348  {
349  instance::set_node_parameters<Kind>(*this, node);
350  }
351 
352 protected:
353  template_::handle_t template_handle_;
354  handle_type handle_;
355  bool owning_;
356 };
357 
370 void upload(const instance_t& instance, const stream_t& stream);
371 
372 namespace instance {
373 
374 inline instance_t wrap(template_::handle_t template_handle, handle_t handle, bool is_owning) noexcept
375 {
376  return instance_t{template_handle, handle, is_owning};
377 }
378 
379 enum : bool {
380  do_free_previous_allocations_before_relaunch = true,
381  auto_free = true,
382  dont_free_previous_allocations_before_relaunch = false,
383  no_auto_free = false,
384 #if CUDA_VERSION >= 12000
385 
386  do_upload_on_instantiation = true,
387  dont_upload_on_instantiation = false,
388  auto_upload = true,
389  no_auto_upload = false,
390  manual_upload = false,
391 
392  make_launchable_from_device_code = true,
393  dont_make_launchable_from_device_code = true,
394  do_make_device_launchable = true,
395  dont_make_device_launchable = false,
396 #endif // CUDA_VERSION >= 12000
397 #if CUDA_VERSION >= 11700
398 
399  do_use_per_node_priorities = true,
400  do_use_per_node_priority = true,
401  dont_use_per_node_priorities = false,
402  dont_use_per_node_priority = true,
403  use_stream_priority = false
404 #endif // CUDA_VERSION >= 11700
405 };
406 
407 namespace detail_ {
408 
409 #if CUDA_VERSION >= 11040
410 inline flags_t build_flags(
411  bool free_previous_allocations_before_relaunch
412 #if CUDA_VERSION >= 12000
413  , bool upload_on_instantiation
414  , bool make_device_launchable
415 #endif // CUDA_VERSION >= 12000
416 #if CUDA_VERSION >= 11700
417  , bool use_per_node_priorities
418 #endif
419  )
420 {
421  return
422  (free_previous_allocations_before_relaunch ? CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH : 0)
423 #if CUDA_VERSION >= 12000
424  | (upload_on_instantiation ? CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD : 0)
425  | (make_device_launchable ? CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH : 0)
426 #endif
427 #if CUDA_VERSION >= 11700
428  | (use_per_node_priorities ? CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY : 0)
429 #endif
430  ;
431 }
432 #endif // CUDA_VERSION >= 11040
433 
434 inline ::std::string identify(const instance_t& instance)
435 {
436  return identify(instance.handle()) + " instantiated from "
437  + template_::detail_::identify(instance.template_handle());
438 }
439 
440 inline ::std::string identify(const instance_t& instance, const template_t& template_)
441 {
442  return identify(instance.handle()) + " instantiated from "
443  + template_::detail_::identify(template_);
444 }
445 
446 } // namespace detail_
447 
448 template <node::kind_t Kind>
449 void set_node_parameters(
450  const instance_t& instance,
451  const node_t& node,
452  node::parameters_t<Kind> parameters)
453 {
454  auto status = detail_::set_node_parameters_nothrow<Kind>(
455  instance.handle(), node.handle(), node::detail_::kind_traits<Kind>::marshal(parameters));
456  throw_if_error_lazy(status, "Setting parameters of " + node::detail_::identify(node)
457  + " in " + instance::detail_::identify(instance));
458 }
459 
460 
461 template <node::kind_t Kind>
462 void set_node_parameters(
463  const instance_t& instance,
464  const node::typed_node_t<Kind>& node_with_new_params)
465 {
466  return set_node_parameters<Kind>(
467  instance, static_cast<node_t&>(node_with_new_params), node_with_new_params.parameters());
468 }
469 
470 
471 } // namespace instance
472 
473 inline instance_t instantiate(
474  const template_t& template_
475 #if CUDA_VERSION >= 11040
476  , bool free_previous_allocations_before_relaunch = false
477 #endif
478 #if CUDA_VERSION >= 12000
479  , bool upload_on_instantiation = false
480  , bool make_device_launchable = false
481 #endif
482 #if CUDA_VERSION >= 11700
483  , bool use_per_node_priorities = false
484 #endif
485 )
486 {
487 #if CUDA_VERSION >= 11040
488  instance::detail_::flags_t flags = instance::detail_::build_flags(
489  free_previous_allocations_before_relaunch
490 #if CUDA_VERSION >= 12000
491  , upload_on_instantiation, make_device_launchable
492 #endif
493 #if CUDA_VERSION >= 11700
494  , use_per_node_priorities
495 #endif
496  );
497 #endif // CUDA_VERSION >= 11040
498  instance::handle_t instance_handle;
499 #if CUDA_VERSION >= 11040
500  auto status = cuGraphInstantiateWithFlags(&instance_handle, template_.handle(), flags);
501  throw_if_error_lazy(status, "Instantiating " + template_::detail_::identify(template_) );
502 #else
503  static constexpr const size_t log_buffer_size { 2048 };
504  auto log_buffer = make_unique_span<char>(log_buffer_size);
505  node::handle_t error_node;
506  auto status = cuGraphInstantiate(&instance_handle, template_.handle(), &error_node, log_buffer.data(), log_buffer_size);
507  throw_if_error_lazy(status, "Instantiating " + template_::detail_::identify(template_) + ": error at "
508  + node::detail_::identify(error_node) + " ; log buffer contents:\n" + log_buffer.data());
509 #endif // CUDA_VERSION >= 11000
510  static constexpr const bool is_owning { true };
511  return instance::wrap(template_.handle(), instance_handle, is_owning);
512 }
513 
514 void launch(const cuda::stream_t& stream, const instance_t& instance);
515 
516 } // namespace graph
517 
518 } // namespace cuda
519 
520 #endif // CUDA_VERSION >= 10000
521 
522 #endif //CUDA_API_WRAPPERS_INSTANCE_HPP
Proxy class for a CUDA stream.
Definition: stream.hpp:258
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed.
Definition: error.hpp:220
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:396
named_t
Aliases for CUDA status codes.
Definition: error.hpp:36
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:226
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:236
Graph template node proxy (base-)class base-class node_t and supporting code.
constexpr bool is_success(status_t status)
Determine whether the API call returning the specified status had succeeded.
Definition: error.hpp:214
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74