cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
instance.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_INSTANCE_HPP
9 #define CUDA_API_WRAPPERS_INSTANCE_HPP
10 
11 #if CUDA_VERSION >= 10000
12 
13 #include "node.hpp"
14 #include "identify.hpp"
15 #include "../types.hpp"
16 
17 namespace cuda {
18 
19 class stream_t;
20 
21 namespace graph {
22 
23 class instance_t;
24 
26 class template_t;
28 
29 namespace instance {
30 
31 using update_status_t = CUgraphExecUpdateResult;
32 
33 namespace update_status {
34 
35 enum named_t : ::std::underlying_type<update_status_t>::type {
36  success = CU_GRAPH_EXEC_UPDATE_SUCCESS,
37  failure_for_unexpected_reason = CU_GRAPH_EXEC_UPDATE_ERROR,
38  topology_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED,
39  node_type_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED,
40  kernel_node_function_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED,
41  unsupported_kind_of_parameter_change = CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED,
42  unsupported_aspect_of_node = CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED,
43 #if CUDA_VERSION >= 11020
44  unsupported_kind_of_kernel_node_function_change = CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE,
45 #if CUDA_VERSION >= 11060
46  unsupported_kind_of_node_attributes_change = CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED,
47 #endif // CUDA_VERSION >= 11060
48 #endif // CUDA_VERSION >= 11020
49 };
50 
51 constexpr inline bool operator==(const update_status_t &lhs, const named_t &rhs) noexcept { return lhs == static_cast<update_status_t>(rhs); }
52 constexpr inline bool operator!=(const update_status_t &lhs, const named_t &rhs) noexcept { return lhs != static_cast<update_status_t>(rhs); }
53 constexpr inline bool operator==(const named_t &lhs, const update_status_t &rhs) noexcept { return static_cast<update_status_t>(lhs) == rhs; }
54 constexpr inline bool operator!=(const named_t &lhs, const update_status_t &rhs) noexcept { return static_cast<update_status_t>(lhs) != rhs; }
55 
56 namespace detail_ {
57 
58 const char *const descriptions[] = {
59  "success",
60  "failure for an unexpected reason described in the return value of the function",
61  "topology has changed",
62  "node type has changed",
63  "kernel node function has changed",
64  "parameters changed in an unsupported way",
65  "something about the node is not supported",
66  "unsupported kind of kernel node function change",
67  "unsupported kind of node attributes change"
68 };
69 
70 inline bool is_node_specific(update_status_t update_status)
71 {
72  return
73  (update_status != success) and
74  (update_status != failure_for_unexpected_reason) and
75  (update_status != topology_has_changed) and
76  (update_status != unsupported_kind_of_parameter_change);
77 }
78 
79 } // namespace detail_
80 
81 } // namespace update_status
82 
83 namespace detail_ {
84 
85 using flags_t = cuuint64_t;
86 
87 inline const char *describe(instance::update_status_t update_status)
88 {
89  return instance::update_status::detail_::descriptions[update_status];
90 }
91 
92 inline ::std::string describe(
93  instance::update_status_t update_status,
94  node::handle_t node_handle,
95  template_::handle_t graph_template_handle);
96 
97 } // namespace detail_
98 
99 } // namespace instance
100 
101 
122  void launch(const instance_t& instance, const stream_t& stream);
123 
124 } // namespace graph
125 
129 inline ::std::string describe(graph::instance::update_status_t status)
130 {
131  return graph::instance::detail_::describe(status);
132 }
133 
134 ::std::string describe(graph::instance::update_status_t update_status, optional<graph::node_t> node);
135 
139 inline constexpr bool is_success(graph::instance::update_status_t status)
140 {
141  return status == graph::instance::update_status::success;
142 }
143 
147 constexpr bool is_failure(graph::instance::update_status_t status) { return not is_success(status); }
148 
149 namespace graph {
150 
151 namespace instance {
152 
153 instance_t wrap(template_::handle_t template_handle, handle_t handle, bool is_owning) noexcept;
154 
155 namespace detail_ {
156 
157 ::std::string identify(const instance_t &instance);
158 
159 } // namespace detail_
160 
161 // TODO: Add support for reporting errors involving edges
162 class update_failure : public ::std::runtime_error {
163 public:
164  using parent = ::std::runtime_error;
165 
166  update_failure(
167  update_status_t kind,
168  optional<node_t>&& impermissible_node,
169  ::std::string&& what_arg) noexcept
170  :
171  parent((what_arg.empty() ? "" : what_arg + ": ") + describe(kind, impermissible_node)),
172  kind_(kind),
173  impermissible_node_(std::move(impermissible_node))
174  {
175  // TODO: Ensure the kind needs a node handle IFF a node handle has been provided
176  }
177 
178  update_failure(update_status_t kind, node_t impermissible_node) noexcept
179  : update_failure(kind, optional<node_t>(std::move(impermissible_node)), "")
180  { }
181 
182  update_status_t kind() const noexcept { return kind_; }
183  node_t impermissible_node() const { return impermissible_node_.value(); }
184 
185 private:
186  update_status_t kind_;
187  optional<node_t> impermissible_node_;
188 };
189 
199 void update(const instance_t& destination, const template_t& source);
200 
201 namespace detail_ {
202 
203 template <node::kind_t Kind>
204 status_t set_node_parameters_nothrow(
205  const instance::handle_t instance_handle,
206  const node::handle_t node_handle,
207  const typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
208 {
209  auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
210  return node::detail_::kind_traits<Kind>::instance_setter(instance_handle, node_handle, raw_params_maybe_ptr);
211 }
212 
213 } // namespace detail_
214 
215 
216 template <node::kind_t Kind>
217 void set_node_parameters(
218  const instance_t& instance,
219  const node_t& node,
220  const node::parameters_t<Kind> parameters);
221 
222 } // namespace instance
223 
224 namespace detail_ {
225 
226 inline void launch_graph_in_current_context(stream::handle_t stream_handle, instance::handle_t graph_instance_handle)
227 {
228  auto status = cuGraphLaunch(graph_instance_handle, stream_handle);
229  throw_if_error_lazy(status, "Trying to launch "
230  + instance::detail_::identify(graph_instance_handle) + " on " + stream::detail_::identify(stream_handle));
231 }
232 
233 inline void launch(context::handle_t context_handle, stream::handle_t stream_handle, instance::handle_t graph_instance_handle)
234 {
235  context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle);
236  launch_graph_in_current_context(stream_handle, graph_instance_handle);
237 }
238 
239 } // namespace detail_
240 
241 class instance_t {
242 public: // data types
243  using handle_type = instance::handle_t;
244 
245 public: // getters
246  template_::handle_t template_handle() const noexcept { return template_handle_; }
247  handle_type handle() const noexcept { return handle_; }
248  bool is_owning() const noexcept { return owning_; }
249 
250 protected: // constructors
251  instance_t(template_::handle_t template_handle, handle_type handle, bool owning) noexcept
252  : template_handle_(template_handle), handle_(handle), owning_(owning)
253  { }
254 
255 public: // constructors & destructor
256  instance_t(const instance_t& other) noexcept = delete;
257 
258  instance_t(instance_t&& other) noexcept : instance_t(other.template_handle_, other.handle_, other.owning_)
259  {
260  other.owning_ = false;
261  }
262  ~instance_t()
263  {
264  if (owning_) cuGraphExecDestroy(handle_);
265  }
266 
267 public: // operators
268  instance_t& operator=(const instance_t&) = delete;
269  instance_t& operator=(instance_t&& other) noexcept
270  {
271  ::std::swap(template_handle_, other.template_handle_);
272  ::std::swap(handle_, other.handle_);
273  ::std::swap(owning_, other.owning_);
274  return *this;
275  }
276 
277 
278 public: // friends
279  friend instance_t instance::wrap(template_::handle_t template_handle, handle_type handle, bool is_owning) noexcept;
280 
281 public: // non-mutators
282  void update(const template_t& update_source) const
283  {
284  instance::update(*this, update_source);
285  }
286 
287  void launch(const stream_t& stream) const
288  {
289  graph::launch(*this, stream);
290  }
291 #if CUDA_VERSION >= 11010
292  void upload(const stream_t& stream) const;
293 #endif // CUDA_VERSION >= 11010
294 
295 #if CUDA_VERSION >= 12000
296  bool frees_allocations_before_relaunch() const
297  {
298  instance::detail_::flags_t flags;
299  auto status = cuGraphExecGetFlags (handle_, &flags);
300  throw_if_error_lazy(status, "Obtaining execution graph instance flags");
301  return flags & CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH;
302  }
303 
304  bool uses_node_priorities() const
305  {
306  instance::detail_::flags_t flags;
307  auto status = cuGraphExecGetFlags (handle_, &flags);
308  throw_if_error_lazy(status, "Obtaining execution graph instance flags");
309  return flags & CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY;
310  }
311 
312 #endif
313 
314  template <node::kind_t Kind>
315  void set_node_parameters(const node_t& node, node::parameters_t<Kind> new_parameters)
316  {
317  instance::set_node_parameters<Kind>(*this, node, ::std::move(new_parameters));
318  }
319 
320  template <node::kind_t Kind>
321  void set_node_parameters(const node::typed_node_t<Kind>& node)
322  {
323  instance::set_node_parameters<Kind>(*this, node);
324  }
325 
326 protected:
327  template_::handle_t template_handle_;
328  handle_type handle_;
329  bool owning_;
330 };
331 
344 void upload(const instance_t& instance, const stream_t& stream);
345 
346 namespace instance {
347 
348 inline instance_t wrap(template_::handle_t template_handle, handle_t handle, bool is_owning) noexcept
349 {
350  return instance_t{template_handle, handle, is_owning};
351 }
352 
353 enum : bool {
354  do_free_previous_allocations_before_relaunch = true,
355  auto_free = true,
356  dont_free_previous_allocations_before_relaunch = false,
357  no_auto_free = false,
358 #if CUDA_VERSION >= 12000
359 
360  do_upload_on_instantiation = true,
361  dont_upload_on_instantiation = false,
362  auto_upload = true,
363  no_auto_upload = false,
364  manual_upload = false,
365 
366  make_launchable_from_device_code = true,
367  dont_make_launchable_from_device_code = true,
368  do_make_device_launchable = true,
369  dont_make_device_launchable = false,
370 #endif // CUDA_VERSION >= 12000
371 #if CUDA_VERSION >= 11700
372 
373  do_use_per_node_priorities = true,
374  do_use_per_node_priority = true,
375  dont_use_per_node_priorities = false,
376  dont_use_per_node_priority = true,
377  use_stream_priority = false
378 #endif // CUDA_VERSION >= 11700
379 };
380 
381 namespace detail_ {
382 
383 #if CUDA_VERSION >= 11040
384 inline flags_t build_flags(
385  bool free_previous_allocations_before_relaunch
386 #if CUDA_VERSION >= 12000
387  , bool upload_on_instantiation
388  , bool make_device_launchable
389 #endif // CUDA_VERSION >= 12000
390 #if CUDA_VERSION >= 11700
391  , bool use_per_node_priorities
392 #endif
393  )
394 {
395  return
396  (free_previous_allocations_before_relaunch ? CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH : 0)
397 #if CUDA_VERSION >= 12000
398  | (upload_on_instantiation ? CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD : 0)
399  | (make_device_launchable ? CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH : 0)
400 #endif
401 #if CUDA_VERSION >= 11700
402  | (use_per_node_priorities ? CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY : 0)
403 #endif
404  ;
405 }
406 #endif // CUDA_VERSION >= 11040
407 
408 inline ::std::string identify(const instance_t& instance)
409 {
410  return identify(instance.handle()) + " instantiated from "
411  + template_::detail_::identify(instance.template_handle());
412 }
413 
414 inline ::std::string identify(const instance_t& instance, const template_t& template_)
415 {
416  return identify(instance.handle()) + " instantiated from "
417  + template_::detail_::identify(template_);
418 }
419 
420 } // namespace detail_
421 
422 template <node::kind_t Kind>
423 void set_node_parameters(
424  const instance_t& instance,
425  const node_t& node,
426  node::parameters_t<Kind> parameters)
427 {
428  auto status = detail_::set_node_parameters_nothrow<Kind>(
429  instance.handle(), node.handle(), node::detail_::kind_traits<Kind>::marshal(parameters));
430  throw_if_error_lazy(status, "Setting parameters of " + node::detail_::identify(node)
431  + " in " + instance::detail_::identify(instance));
432 }
433 
434 
435 template <node::kind_t Kind>
436 void set_node_parameters(
437  const instance_t& instance,
438  const node::typed_node_t<Kind>& node_with_new_params)
439 {
440  return set_node_parameters<Kind>(
441  instance, static_cast<node_t&>(node_with_new_params), node_with_new_params.parameters());
442 }
443 
444 
445 } // namespace instance
446 
447 inline instance_t instantiate(
448  const template_t& template_
449 #if CUDA_VERSION >= 11040
450  , bool free_previous_allocations_before_relaunch = false
451 #endif
452 #if CUDA_VERSION >= 12000
453  , bool upload_on_instantiation = false
454  , bool make_device_launchable = false
455 #endif
456 #if CUDA_VERSION >= 11700
457  , bool use_per_node_priorities = false
458 #endif
459 )
460 {
461 #if CUDA_VERSION >= 11040
462  instance::detail_::flags_t flags = instance::detail_::build_flags(
463  free_previous_allocations_before_relaunch
464 #if CUDA_VERSION >= 12000
465  , upload_on_instantiation, make_device_launchable
466 #endif
467 #if CUDA_VERSION >= 11700
468  , use_per_node_priorities
469 #endif
470  );
471 #endif // CUDA_VERSION >= 11040
472  instance::handle_t instance_handle;
473 #if CUDA_VERSION >= 11040
474  auto status = cuGraphInstantiateWithFlags(&instance_handle, template_.handle(), static_cast<unsigned long long>(flags));
475  throw_if_error_lazy(status, "Instantiating " + template_::detail_::identify(template_) );
476 #else
477  static constexpr const size_t log_buffer_size { 2048 };
478  auto log_buffer = make_unique_span<char>(log_buffer_size);
479  node::handle_t error_node;
480  auto status = cuGraphInstantiate(&instance_handle, template_.handle(), &error_node, log_buffer.data(), log_buffer_size);
481  throw_if_error_lazy(status, "Instantiating " + template_::detail_::identify(template_) + ": error at "
482  + node::detail_::identify(error_node) + " ; log buffer contents:\n" + log_buffer.data());
483 #endif // CUDA_VERSION >= 11000
484  static constexpr const bool is_owning { true };
485  return instance::wrap(template_.handle(), instance_handle, is_owning);
486 }
487 
488 void launch(const cuda::stream_t& stream, const instance_t& instance);
489 
490 } // namespace graph
491 
492 } // namespace cuda
493 
494 #endif // CUDA_VERSION >= 10000
495 
496 #endif //CUDA_API_WRAPPERS_INSTANCE_HPP
Proxy class for a CUDA stream.
Definition: stream.hpp:246
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed.
Definition: error.hpp:209
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
named_t
Aliases for CUDA status codes.
Definition: error.hpp:36
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:215
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239
Graph template node proxy (base-)class base-class node_t and supporting code.
constexpr bool is_success(status_t status)
Determine whether the API call returning the specified status had succeeded.
Definition: error.hpp:203
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77