8 #ifndef CUDA_API_WRAPPERS_INSTANCE_HPP 9 #define CUDA_API_WRAPPERS_INSTANCE_HPP 11 #if CUDA_VERSION >= 10000 15 #include "../types.hpp" 31 using update_status_t = CUgraphExecUpdateResult;
33 namespace update_status {
35 enum named_t : ::std::underlying_type<update_status_t>::type {
36 success = CU_GRAPH_EXEC_UPDATE_SUCCESS,
37 failure_for_unexpected_reason = CU_GRAPH_EXEC_UPDATE_ERROR,
38 topology_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED,
39 node_type_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED,
40 kernel_node_function_has_changed = CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED,
41 unsupported_kind_of_parameter_change = CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED,
42 unsupported_aspect_of_node = CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED,
43 #if CUDA_VERSION >= 11020 44 unsupported_kind_of_kernel_node_function_change = CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE,
45 #if CUDA_VERSION >= 11060 46 unsupported_kind_of_node_attributes_change = CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED,
47 #endif // CUDA_VERSION >= 11060 48 #endif // CUDA_VERSION >= 11020 51 constexpr
inline bool operator==(
const update_status_t &lhs,
const named_t &rhs) noexcept {
return lhs ==
static_cast<update_status_t
>(rhs); }
52 constexpr
inline bool operator!=(
const update_status_t &lhs,
const named_t &rhs) noexcept {
return lhs !=
static_cast<update_status_t
>(rhs); }
53 constexpr
inline bool operator==(
const named_t &lhs,
const update_status_t &rhs) noexcept {
return static_cast<update_status_t
>(lhs) == rhs; }
54 constexpr
inline bool operator!=(
const named_t &lhs,
const update_status_t &rhs) noexcept {
return static_cast<update_status_t
>(lhs) != rhs; }
58 const char *
const descriptions[] = {
60 "failure for an unexpected reason described in the return value of the function",
61 "topology has changed",
62 "node type has changed",
63 "kernel node function has changed",
64 "parameters changed in an unsupported way",
65 "something about the node is not supported",
66 "unsupported kind of kernel node function change",
67 "unsupported kind of node attributes change" 70 inline bool is_node_specific(update_status_t update_status)
73 (update_status != success) and
74 (update_status != failure_for_unexpected_reason) and
75 (update_status != topology_has_changed) and
76 (update_status != unsupported_kind_of_parameter_change);
85 using flags_t = cuuint64_t;
87 inline const char *
describe(instance::update_status_t update_status)
89 return instance::update_status::detail_::descriptions[update_status];
93 instance::update_status_t update_status,
122 void launch(
const instance_t& instance,
const stream_t& stream);
129 inline ::std::string
describe(graph::instance::update_status_t status)
134 ::std::string
describe(graph::instance::update_status_t update_status, optional<graph::node_t> node);
139 inline constexpr
bool is_success(graph::instance::update_status_t status)
141 return status == graph::instance::update_status::success;
147 constexpr
bool is_failure(graph::instance::update_status_t status) {
return not
is_success(status); }
157 ::std::string identify(
const instance_t &instance);
162 class update_failure :
public ::std::runtime_error {
164 using parent = ::std::runtime_error;
167 update_status_t kind,
168 optional<node_t>&& impermissible_node,
169 ::std::string&& what_arg) noexcept
171 parent((what_arg.empty() ?
"" : what_arg +
": ") +
describe(kind, impermissible_node)),
173 impermissible_node_(std::move(impermissible_node))
178 update_failure(update_status_t kind, node_t impermissible_node) noexcept
179 : update_failure(kind, optional<node_t>(std::move(impermissible_node)),
"")
182 update_status_t kind() const noexcept {
return kind_; }
183 node_t impermissible_node()
const {
return impermissible_node_.value(); }
186 update_status_t kind_;
187 optional<node_t> impermissible_node_;
199 void update(
const instance_t& destination,
const template_t& source);
203 template <node::kind_t Kind>
204 status_t set_node_parameters_nothrow(
207 const typename node::detail_::kind_traits<Kind>::raw_parameters_type raw_params)
209 auto raw_params_maybe_ptr = node::detail_::maybe_add_ptr<Kind>(raw_params);
210 return node::detail_::kind_traits<Kind>::instance_setter(instance_handle, node_handle, raw_params_maybe_ptr);
216 template <node::kind_t Kind>
217 void set_node_parameters(
218 const instance_t& instance,
220 const node::parameters_t<Kind> parameters);
228 auto status = cuGraphLaunch(graph_instance_handle, stream_handle);
230 + instance::detail_::identify(graph_instance_handle) +
" on " + stream::detail_::identify(stream_handle));
235 context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle);
236 launch_graph_in_current_context(stream_handle, graph_instance_handle);
247 handle_type handle() const noexcept {
return handle_; }
248 bool is_owning() const noexcept {
return owning_; }
252 : template_handle_(template_handle), handle_(handle), owning_(owning)
256 instance_t(
const instance_t& other) noexcept =
delete;
258 instance_t(instance_t&& other) noexcept : instance_t(other.template_handle_, other.handle_, other.owning_)
260 other.owning_ =
false;
264 if (owning_) cuGraphExecDestroy(handle_);
268 instance_t& operator=(
const instance_t&) =
delete;
269 instance_t& operator=(instance_t&& other) noexcept
271 ::std::swap(template_handle_, other.template_handle_);
272 ::std::swap(handle_, other.handle_);
273 ::std::swap(owning_, other.owning_);
282 void update(
const template_t& update_source)
const 284 instance::update(*
this, update_source);
287 void launch(
const stream_t& stream)
const 291 #if CUDA_VERSION >= 11010 292 void upload(
const stream_t& stream)
const;
293 #endif // CUDA_VERSION >= 11010 295 #if CUDA_VERSION >= 12000 296 bool frees_allocations_before_relaunch()
const 298 instance::detail_::flags_t flags;
299 auto status = cuGraphExecGetFlags (handle_, &flags);
301 return flags & CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH;
304 bool uses_node_priorities()
const 306 instance::detail_::flags_t flags;
307 auto status = cuGraphExecGetFlags (handle_, &flags);
309 return flags & CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY;
314 template <node::kind_t Kind>
315 void set_node_parameters(
const node_t& node, node::parameters_t<Kind> new_parameters)
317 instance::set_node_parameters<Kind>(*
this, node, ::std::move(new_parameters));
320 template <node::kind_t Kind>
321 void set_node_parameters(
const node::typed_node_t<Kind>& node)
323 instance::set_node_parameters<Kind>(*
this, node);
344 void upload(
const instance_t& instance,
const stream_t& stream);
350 return instance_t{template_handle, handle, is_owning};
354 do_free_previous_allocations_before_relaunch =
true,
356 dont_free_previous_allocations_before_relaunch =
false,
357 no_auto_free =
false,
358 #if CUDA_VERSION >= 12000 360 do_upload_on_instantiation =
true,
361 dont_upload_on_instantiation =
false,
363 no_auto_upload =
false,
364 manual_upload =
false,
366 make_launchable_from_device_code =
true,
367 dont_make_launchable_from_device_code =
true,
368 do_make_device_launchable =
true,
369 dont_make_device_launchable =
false,
370 #endif // CUDA_VERSION >= 12000 371 #if CUDA_VERSION >= 11700 373 do_use_per_node_priorities =
true,
374 do_use_per_node_priority =
true,
375 dont_use_per_node_priorities =
false,
376 dont_use_per_node_priority =
true,
377 use_stream_priority =
false 378 #endif // CUDA_VERSION >= 11700 383 #if CUDA_VERSION >= 11040 384 inline flags_t build_flags(
385 bool free_previous_allocations_before_relaunch
386 #
if CUDA_VERSION >= 12000
387 ,
bool upload_on_instantiation
388 ,
bool make_device_launchable
390 #
if CUDA_VERSION >= 11700
391 ,
bool use_per_node_priorities
396 (free_previous_allocations_before_relaunch ? CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH : 0)
397 #if CUDA_VERSION >= 12000 398 | (upload_on_instantiation ? CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD : 0)
399 | (make_device_launchable ? CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH : 0)
401 #if CUDA_VERSION >= 11700 402 | (use_per_node_priorities ? CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY : 0)
406 #endif // CUDA_VERSION >= 11040 408 inline ::std::string identify(
const instance_t& instance)
410 return identify(instance.handle()) +
" instantiated from " 411 + template_::detail_::identify(instance.template_handle());
414 inline ::std::string identify(
const instance_t& instance,
const template_t& template_)
416 return identify(instance.handle()) +
" instantiated from " 417 + template_::detail_::identify(template_);
422 template <node::kind_t Kind>
423 void set_node_parameters(
424 const instance_t& instance,
426 node::parameters_t<Kind> parameters)
428 auto status = detail_::set_node_parameters_nothrow<Kind>(
429 instance.handle(), node.handle(), node::detail_::kind_traits<Kind>::marshal(parameters));
431 +
" in " + instance::detail_::identify(instance));
435 template <node::kind_t Kind>
436 void set_node_parameters(
437 const instance_t& instance,
438 const node::typed_node_t<Kind>& node_with_new_params)
440 return set_node_parameters<Kind>(
441 instance,
static_cast<node_t&
>(node_with_new_params), node_with_new_params.parameters());
447 inline instance_t instantiate(
448 const template_t& template_
449 #
if CUDA_VERSION >= 11040
450 ,
bool free_previous_allocations_before_relaunch =
false 452 #
if CUDA_VERSION >= 12000
453 ,
bool upload_on_instantiation =
false 454 ,
bool make_device_launchable =
false 456 #
if CUDA_VERSION >= 11700
457 ,
bool use_per_node_priorities =
false 461 #if CUDA_VERSION >= 11040 462 instance::detail_::flags_t flags = instance::detail_::build_flags(
463 free_previous_allocations_before_relaunch
464 #
if CUDA_VERSION >= 12000
465 , upload_on_instantiation, make_device_launchable
467 #
if CUDA_VERSION >= 11700
468 , use_per_node_priorities
471 #endif // CUDA_VERSION >= 11040 473 #if CUDA_VERSION >= 11040 474 auto status = cuGraphInstantiateWithFlags(&instance_handle, template_.handle(),
static_cast<unsigned long long>(flags));
477 static constexpr
const size_t log_buffer_size { 2048 };
478 auto log_buffer = make_unique_span<char>(log_buffer_size);
480 auto status = cuGraphInstantiate(&instance_handle, template_.handle(), &error_node, log_buffer.data(), log_buffer_size);
481 throw_if_error_lazy(status,
"Instantiating " + template_::detail_::identify(template_) +
": error at " 482 + node::detail_::identify(error_node) +
" ; log buffer contents:\n" + log_buffer.data());
483 #endif // CUDA_VERSION >= 11000 484 static constexpr
const bool is_owning {
true };
485 return instance::wrap(template_.handle(), instance_handle, is_owning);
494 #endif // CUDA_VERSION >= 10000 496 #endif //CUDA_API_WRAPPERS_INSTANCE_HPP Proxy class for a CUDA stream.
Definition: stream.hpp:246
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed.
Definition: error.hpp:209
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
named_t
Aliases for CUDA status codes.
Definition: error.hpp:36
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:215
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
bool operator==(const context_t &lhs, const context_t &rhs) noexcept
Definition: context.hpp:762
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
Graph template node proxy (base-)class base-class node_t and supporting code.
constexpr bool is_success(status_t status)
Determine whether the API call returning the specified status had succeeded.
Definition: error.hpp:203
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:77