8 #ifndef CUDA_API_WRAPPERS_INSTANCE_HPP 9 #define CUDA_API_WRAPPERS_INSTANCE_HPP 11 #if CUDA_VERSION >= 10000 15 #include "../types.hpp" 31 using update_status_t = CUgraphExecUpdateResult;
33 namespace update_status {
35 enum named_t : ::std::underlying_type<update_status_t>::type {
36 success = CU_GRAPH_EXEC_UPDATE_SUCCESS,
37 failure_for_unexpected_reason = CU_GRAPH_EXEC_UPDATE_ERROR,
38 topology_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED,
39 node_type_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED,
40 kernel_node_function_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED,
41 unsupported_kind_of_parameter_change = CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED,
42 unsupported_aspect_of_node = CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED,
43 #if CUDA_VERSION >= 11020 44 unsupported_kind_of_kernel_node_function_change = CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE,
45 #if CUDA_VERSION >= 11060 46 unsupported_kind_of_node_attributes_change = CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED,
47 #endif // CUDA_VERSION >= 11060 48 #endif // CUDA_VERSION >= 11020 51 constexpr
inline bool operator==(
const update_status_t &lhs,
const named_t &rhs) noexcept {
return lhs ==
static_cast<update_status_t
>(rhs); }
52 constexpr
inline bool operator!=(
const update_status_t &lhs,
const named_t &rhs) noexcept {
return lhs !=
static_cast<update_status_t
>(rhs); }
53 constexpr
inline bool operator==(
const named_t &lhs,
const update_status_t &rhs) noexcept {
return static_cast<update_status_t
>(lhs) == rhs; }
54 constexpr
inline bool operator!=(
const named_t &lhs,
const update_status_t &rhs) noexcept {
return static_cast<update_status_t
>(lhs) != rhs; }
58 constexpr
const char *
const descriptions[] = {
60 "failure for an unexpected reason described in the return value of the function",
61 "topology has changed",
62 "node type has changed",
63 "kernel node function has changed",
64 "parameters changed in an unsupported way",
65 "something about the node is not supported",
66 "unsupported kind of kernel node function change",
67 "unsupported kind of node attributes change" 70 inline bool is_node_specific(update_status_t update_status)
73 update_status != success and
74 update_status != failure_for_unexpected_reason and
75 update_status != topology_has_changed and
76 update_status != unsupported_kind_of_parameter_change;
85 using flags_t = cuuint64_t;
87 inline const char *
describe(instance::update_status_t update_status)
89 return instance::update_status::detail_::descriptions[update_status];
93 instance::update_status_t update_status,
97 #if CUDA_VERSION >= 13010 101 auto status = cuGraphExecGetId(handle, &
id);
105 #endif // CUDA_VERSION >= 13010 132 void launch(
const instance_t& instance,
const stream_t& stream);
139 inline ::std::string
describe(graph::instance::update_status_t status)
144 ::std::string
describe(graph::instance::update_status_t update_status, optional<graph::node_t> node);
149 inline constexpr
bool is_success(graph::instance::update_status_t status)
151 return status == graph::instance::update_status::success;
157 constexpr
bool is_failure(graph::instance::update_status_t status) {
return not
is_success(status); }
167 ::std::string identify(
const instance_t &instance);
172 class update_failure :
public ::std::runtime_error {
174 using parent = ::std::runtime_error;
177 update_status_t kind,
178 optional<node_t>&& impermissible_node,
179 ::std::string&& what_arg) noexcept
181 parent((what_arg.empty() ?
"" : what_arg +
": ") +
describe(kind, impermissible_node)),
183 impermissible_node_(::std::move(impermissible_node))
188 update_failure(update_status_t kind, node_t impermissible_node) noexcept
189 : update_failure(kind, optional<node_t>(::std::move(impermissible_node)),
"")
192 update_status_t kind() const noexcept {
return kind_; }
193 node_t impermissible_node()
const {
return impermissible_node_.value(); }
196 update_status_t kind_;
197 optional<node_t> impermissible_node_;
209 void update(
const instance_t& destination,
const template_t& source);
213 template <node::kind_t Kind>
214 status_t set_node_parameters_nothrow(
217 const typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
219 auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
220 return node::detail_::kind_traits<Kind>::instance_setter(instance_handle, node_handle, raw_params_maybe_ptr);
226 template <node::kind_t Kind>
227 void set_node_parameters(
228 const instance_t& instance,
230 const node::parameters_t<Kind> parameters);
238 auto status = cuGraphLaunch(graph_instance_handle, stream_handle);
240 + instance::detail_::identify(graph_instance_handle) +
" on " + stream::detail_::identify(stream_handle));
245 context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle);
246 launch_graph_in_current_context(stream_handle, graph_instance_handle);
257 handle_type handle() const noexcept {
return handle_; }
258 bool is_owning() const noexcept {
return owning_; }
262 : template_handle_(template_handle), handle_(handle), owning_(owning)
266 instance_t(
const instance_t& other) noexcept =
delete;
268 instance_t(instance_t&& other) noexcept : instance_t(other.template_handle_, other.handle_, other.owning_)
270 other.owning_ =
false;
272 ~instance_t() DESTRUCTOR_EXCEPTION_SPEC
275 auto status = cuGraphExecDestroy(handle_);
276 #if THROW_IN_DESTRUCTORS 285 instance_t& operator=(
const instance_t&) =
delete;
286 instance_t& operator=(instance_t&& other) noexcept
288 ::std::swap(template_handle_, other.template_handle_);
289 ::std::swap(handle_, other.handle_);
290 ::std::swap(owning_, other.owning_);
299 void update(
const template_t& update_source)
const 301 instance::update(*
this, update_source);
304 void launch(
const stream_t& stream)
const 308 #if CUDA_VERSION >= 11010 309 void upload(
const stream_t& stream)
const;
310 #endif // CUDA_VERSION >= 11010 312 #if CUDA_VERSION >= 12000 313 bool frees_allocations_before_relaunch()
const 315 instance::detail_::flags_t flags;
316 auto status = cuGraphExecGetFlags (handle_, &flags);
318 return flags & CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH;
321 bool uses_node_priorities()
const 323 instance::detail_::flags_t flags;
324 auto status = cuGraphExecGetFlags (handle_, &flags);
326 return flags & CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY;
331 #if CUDA_VERSION >= 13010 336 return instance::detail_::get_id(handle_);
338 #endif // CUDA_VERSION >= 13010 340 template <node::kind_t Kind>
341 void set_node_parameters(
const node_t& node, node::parameters_t<Kind> new_parameters)
343 instance::set_node_parameters<Kind>(*
this, node, ::std::move(new_parameters));
346 template <node::kind_t Kind>
347 void set_node_parameters(
const node::typed_node_t<Kind>& node)
349 instance::set_node_parameters<Kind>(*
this, node);
370 void upload(
const instance_t& instance,
const stream_t& stream);
376 return instance_t{template_handle, handle, is_owning};
380 do_free_previous_allocations_before_relaunch =
true,
382 dont_free_previous_allocations_before_relaunch =
false,
383 no_auto_free =
false,
384 #if CUDA_VERSION >= 12000 386 do_upload_on_instantiation =
true,
387 dont_upload_on_instantiation =
false,
389 no_auto_upload =
false,
390 manual_upload =
false,
392 make_launchable_from_device_code =
true,
393 dont_make_launchable_from_device_code =
true,
394 do_make_device_launchable =
true,
395 dont_make_device_launchable =
false,
396 #endif // CUDA_VERSION >= 12000 397 #if CUDA_VERSION >= 11700 399 do_use_per_node_priorities =
true,
400 do_use_per_node_priority =
true,
401 dont_use_per_node_priorities =
false,
402 dont_use_per_node_priority =
true,
403 use_stream_priority =
false 404 #endif // CUDA_VERSION >= 11700 409 #if CUDA_VERSION >= 11040 410 inline flags_t build_flags(
411 bool free_previous_allocations_before_relaunch
412 #
if CUDA_VERSION >= 12000
413 ,
bool upload_on_instantiation
414 ,
bool make_device_launchable
416 #
if CUDA_VERSION >= 11700
417 ,
bool use_per_node_priorities
422 (free_previous_allocations_before_relaunch ? CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH : 0)
423 #if CUDA_VERSION >= 12000 424 | (upload_on_instantiation ? CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD : 0)
425 | (make_device_launchable ? CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH : 0)
427 #if CUDA_VERSION >= 11700 428 | (use_per_node_priorities ? CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY : 0)
432 #endif // CUDA_VERSION >= 11040 434 inline ::std::string identify(
const instance_t& instance)
436 return identify(instance.handle()) +
" instantiated from " 437 + template_::detail_::identify(instance.template_handle());
440 inline ::std::string identify(
const instance_t& instance,
const template_t& template_)
442 return identify(instance.handle()) +
" instantiated from " 443 + template_::detail_::identify(template_);
448 template <node::kind_t Kind>
449 void set_node_parameters(
450 const instance_t& instance,
452 node::parameters_t<Kind> parameters)
454 auto status = detail_::set_node_parameters_nothrow<Kind>(
455 instance.handle(), node.handle(), node::detail_::kind_traits<Kind>::marshal(parameters));
457 +
" in " + instance::detail_::identify(instance));
461 template <node::kind_t Kind>
462 void set_node_parameters(
463 const instance_t& instance,
464 const node::typed_node_t<Kind>& node_with_new_params)
466 return set_node_parameters<Kind>(
467 instance,
static_cast<node_t&
>(node_with_new_params), node_with_new_params.parameters());
473 inline instance_t instantiate(
474 const template_t& template_
475 #
if CUDA_VERSION >= 11040
476 ,
bool free_previous_allocations_before_relaunch =
false 478 #
if CUDA_VERSION >= 12000
479 ,
bool upload_on_instantiation =
false 480 ,
bool make_device_launchable =
false 482 #
if CUDA_VERSION >= 11700
483 ,
bool use_per_node_priorities =
false 487 #if CUDA_VERSION >= 11040 488 instance::detail_::flags_t flags = instance::detail_::build_flags(
489 free_previous_allocations_before_relaunch
490 #
if CUDA_VERSION >= 12000
491 , upload_on_instantiation, make_device_launchable
493 #
if CUDA_VERSION >= 11700
494 , use_per_node_priorities
497 #endif // CUDA_VERSION >= 11040 499 #if CUDA_VERSION >= 11040 500 auto status = cuGraphInstantiateWithFlags(&instance_handle, template_.handle(), flags);
503 static constexpr
const size_t log_buffer_size { 2048 };
504 auto log_buffer = make_unique_span<char>(log_buffer_size);
506 auto status = cuGraphInstantiate(&instance_handle, template_.handle(), &error_node, log_buffer.data(), log_buffer_size);
507 throw_if_error_lazy(status,
"Instantiating " + template_::detail_::identify(template_) +
": error at " 508 + node::detail_::identify(error_node) +
" ; log buffer contents:\n" + log_buffer.data());
509 #endif // CUDA_VERSION >= 11000 510 static constexpr
const bool is_owning {
true };
511 return instance::wrap(template_.handle(), instance_handle, is_owning);
520 #endif // CUDA_VERSION >= 10000 522 #endif //CUDA_API_WRAPPERS_INSTANCE_HPP Proxy class for a CUDA stream.
Definition: stream.hpp:258
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed.
Definition: error.hpp:220
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:396
named_t
Aliases for CUDA status codes.
Definition: error.hpp:36
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:226
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:768
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:236
Graph template node proxy (base-)class base-class node_t and supporting code.
constexpr bool is_success(status_t status)
Determine whether the API call returning the specified status had succeeded.
Definition: error.hpp:214
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74