8 #ifndef MULTI_WRAPPER_IMPLS_GRAPHS_HPP_     9 #define MULTI_WRAPPER_IMPLS_GRAPHS_HPP_    11 #include "../device.hpp"    12 #include "../pointer.hpp"    13 #include "../memory.hpp"    14 #include "../primary_context.hpp"    15 #include "../stream.hpp"    16 #include "../virtual_memory.hpp"    17 #include "../kernel.hpp"    18 #include "../event.hpp"    19 #include "../kernels/apriori_compiled.hpp"    20 #include "../current_context.hpp"    21 #include "../graph/node.hpp"    22 #include "../graph/template.hpp"    23 #include "../graph/instance.hpp"    24 #include "../graph/typed_node.hpp"    26 #if CUDA_VERSION >= 10000    32 #if CUDA_VERSION >= 11060    34 inline bool is_enabled_in(
const node_t& node, 
const instance_t& instance)
    37     auto status = cuGraphNodeGetEnabled(instance.handle(), node.handle(), &result);
    38     throw_if_error_lazy(status, 
"Determining whether " + node::detail_::identify(node) + 
" is active in " + instance::detail_::identify(instance));
    42 inline void set_enabled_in(
const node_t& node, 
const instance_t& instance, 
bool enabled)
    44     auto status = cuGraphNodeSetEnabled(instance.handle(), node.handle(), enabled);
    45     throw_if_error_lazy(status, 
"Enabling " + node::detail_::identify(node) + 
" in " + instance::detail_::identify(instance));
    47 #endif // CUDA_VERSION >= 11060    49 inline void launch(
const instance_t& instance, 
const stream_t& stream)
    51     context::current::detail_::scoped_override_t set_context_for_current_scope(stream.context_handle());
    52     auto status = cuGraphLaunch(instance.handle(), stream.handle());
    53     throw_if_error_lazy(status, 
"Launching " + instance::detail_::identify(instance) + 
" on " + stream::detail_::identify(stream));
    58 #if CUDA_VERSION >= 11010    59 inline void upload(
const instance_t& instance, 
const stream_t& stream)
    61     context::current::detail_::scoped_override_t set_context_for_current_scope(stream.context_handle());
    62     auto status = cuGraphUpload(instance.handle(), stream.handle());
    63     throw_if_error_lazy(status, 
"Uploading " + instance::detail_::identify(instance) + 
" on " + stream::detail_::identify(stream));
    65 #endif // CUDA_VERSION >= 11010    67 inline void update(
const instance_t& destination, 
const template_t& source)
    69 #if CUDA_VERSION < 12000    71     instance::update_status_t update_status;
    72     auto status = cuGraphExecUpdate(destination.handle(), source.handle(), &impermissible_node_handle, &update_status);
    74         throw instance::update_failure(update_status, 
node::wrap(source.handle(), impermissible_node_handle));
    77     CUgraphExecUpdateResultInfo update_result_info;
    78     auto status = cuGraphExecUpdate(destination.handle(), source.handle(), &update_result_info);
    81         throw instance::update_failure(update_result_info.result, 
node::wrap(source.handle(), update_result_info.errorNode));
    93 #if CUDA_VERSION >= 11010    94 inline void instance_t::upload(
const stream_t& stream)
 const    96     instance::upload(*
this, stream);
    98 #endif // CUDA_VERSION >= 11010   100 inline instance_t template_t::instantiate(
   101 #
if CUDA_VERSION >= 11040
   102     bool free_previous_allocations_before_relaunch
   104 #
if CUDA_VERSION >= 11700
   105     , 
bool use_per_node_priorities
   107 #
if CUDA_VERSION >= 12000
   108     , 
bool upload_on_instantiation
   109     , 
bool make_device_launchable
   113     return graph::instantiate(
   115 #
if CUDA_VERSION >= 11040
   116         , free_previous_allocations_before_relaunch
   118 #
if CUDA_VERSION >= 11700
   119         , use_per_node_priorities
   121 #
if CUDA_VERSION >= 12000
   122         , upload_on_instantiation
   123         , make_device_launchable
   132 inline ::std::string identify(
const node_t &node)
   134     return identify(node.handle(), node.containing_graph_handle());
   137 inline auto kind_traits<kind_t::child_graph>::marshal(
const parameters_type& params) -> raw_parameters_type
   139     return params.handle();
   147 inline template_t node_t::containing_graph() const noexcept
   149     static constexpr 
const bool dont_take_ownership { 
false };
   150     return template_::wrap(containing_graph_handle(), dont_take_ownership);
   158     instance::update_status_t  update_status,
   162     ::std::string result = 
describe(update_status);
   163     if (node_handle != node::no_handle) {
   164         result += node::detail_::identify(node_handle, graph_template_handle);
   190 namespace template_ {
   194 inline ::std::string identify(
const template_t& graph_template)
   196     return identify(graph_template.handle());
   205 inline ::std::string 
describe(graph::instance::update_status_t update_status, optional<graph::node_t> node)
   209            graph::instance::detail_::describe(update_status, node.value().handle(), node.value().containing_graph_handle());
   218     auto status = cuStreamEndCapture(stream.
handle(), &new_graph);
   220         "Completing the capture of operations into a graph on " + stream::detail_::identify(stream));
   227 inline void stream_t::enqueue_t::graph_launch(
const graph::instance_t& graph_instance)
 const   234 #endif // CUDA_VERSION >= 10000   236 #endif // MULTI_WRAPPER_IMPLS_GRAPHS_HPP_ Proxy class for a CUDA stream. 
Definition: stream.hpp:246
 
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps. 
Definition: stream.hpp:257
 
Definitions and functionality wrapping CUDA APIs. 
Definition: array.hpp:22
 
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed. 
Definition: error.hpp:209
 
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
 
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:215
 
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
 
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension) 
Definition: array.hpp:34
 
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance. 
Definition: array.hpp:264