cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
graph.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef MULTI_WRAPPER_IMPLS_GRAPHS_HPP_
9 #define MULTI_WRAPPER_IMPLS_GRAPHS_HPP_
10 
11 #include "../device.hpp"
12 #include "../pointer.hpp"
13 #include "../memory.hpp"
14 #include "../primary_context.hpp"
15 #include "../stream.hpp"
16 #include "../virtual_memory.hpp"
17 #include "../kernel.hpp"
18 #include "../event.hpp"
19 #include "../kernels/apriori_compiled.hpp"
20 #include "../current_context.hpp"
21 #include "../graph/node.hpp"
22 #include "../graph/template.hpp"
23 #include "../graph/instance.hpp"
24 #include "../graph/typed_node.hpp"
25 
26 #if CUDA_VERSION >= 10000
27 
28 namespace cuda {
29 
30 namespace graph {
31 
32 #if CUDA_VERSION >= 11060
33 
34 inline bool is_enabled_in(const node_t& node, const instance_t& instance)
35 {
36  unsigned result;
37  auto status = cuGraphNodeGetEnabled(instance.handle(), node.handle(), &result);
38  throw_if_error_lazy(status, "Determining whether " + node::detail_::identify(node) + " is active in " + instance::detail_::identify(instance));
39  return (result == 1);
40 }
41 
42 inline void set_enabled_in(const node_t& node, const instance_t& instance, bool enabled)
43 {
44  auto status = cuGraphNodeSetEnabled(instance.handle(), node.handle(), enabled);
45  throw_if_error_lazy(status, "Enabling " + node::detail_::identify(node) + " in " + instance::detail_::identify(instance));
46 }
47 #endif // CUDA_VERSION >= 11060
48 
49 inline void launch(const instance_t& instance, const stream_t& stream)
50 {
51  context::current::detail_::scoped_override_t set_context_for_current_scope(stream.context_handle());
52  auto status = cuGraphLaunch(instance.handle(), stream.handle());
53  throw_if_error_lazy(status, "Launching " + instance::detail_::identify(instance) + " on " + stream::detail_::identify(stream));
54 }
55 
56 namespace instance {
57 
58 #if CUDA_VERSION >= 11010
59 inline void upload(const instance_t& instance, const stream_t& stream)
60 {
61  context::current::detail_::scoped_override_t set_context_for_current_scope(stream.context_handle());
62  auto status = cuGraphUpload(instance.handle(), stream.handle());
63  throw_if_error_lazy(status, "Uploading " + instance::detail_::identify(instance) + " on " + stream::detail_::identify(stream));
64 }
65 #endif // CUDA_VERSION >= 11010
66 
67 inline void update(const instance_t& destination, const template_t& source)
68 {
69 #if CUDA_VERSION < 12000
70  node::handle_t impermissible_node_handle{};
71  instance::update_status_t update_status;
72  auto status = cuGraphExecUpdate(destination.handle(), source.handle(), &impermissible_node_handle, &update_status);
73  if (is_failure(status)) {
74  throw instance::update_failure(update_status, node::wrap(source.handle(), impermissible_node_handle));
75  }
76 #else
77  CUgraphExecUpdateResultInfo update_result_info;
78  auto status = cuGraphExecUpdate(destination.handle(), source.handle(), &update_result_info);
79  if (is_failure(status)) {
80  // TODO: Add support for reporting errors involving edges, not just single nodes
81  throw instance::update_failure(update_result_info.result, node::wrap(source.handle(), update_result_info.errorNode));
82  }
83 #endif
84 }
85 
86 } // namespace instance
87 
88 //inline void instance_t::launch(const stream_t& stream) const
89 //{
90 // instance::launch(*this, stream);
91 //}
92 
93 #if CUDA_VERSION >= 11010
94 inline void instance_t::upload(const stream_t& stream) const
95 {
96  instance::upload(*this, stream);
97 }
98 #endif // CUDA_VERSION >= 11010
99 
100 inline instance_t template_t::instantiate(
101 #if CUDA_VERSION >= 11040
102  bool free_previous_allocations_before_relaunch
103 #endif
104 #if CUDA_VERSION >= 11700
105  , bool use_per_node_priorities
106 #endif
107 #if CUDA_VERSION >= 12000
108  , bool upload_on_instantiation
109  , bool make_device_launchable
110 #endif
111 )
112 {
113  return graph::instantiate(
114  *this
115 #if CUDA_VERSION >= 11040
116  , free_previous_allocations_before_relaunch
117 #endif
118 #if CUDA_VERSION >= 11700
119  , use_per_node_priorities
120 #endif
121 #if CUDA_VERSION >= 12000
122  , upload_on_instantiation
123  , make_device_launchable
124 #endif
125  );
126 }
127 
128 namespace node {
129 
130 namespace detail_ {
131 
132 inline ::std::string identify(const node_t &node)
133 {
134  return identify(node.handle(), node.containing_graph_handle());
135 }
136 
137 inline auto kind_traits<kind_t::child_graph>::marshal(const parameters_type& params) -> raw_parameters_type
138 {
139  return params.handle();
140 }
141 
142 } // namespace detail_
143 
144 
145 } // namespace node
146 
147 inline template_t node_t::containing_graph() const noexcept
148 {
149  static constexpr const bool dont_take_ownership { false };
150  return template_::wrap(containing_graph_handle(), dont_take_ownership);
151 }
152 
153 namespace instance {
154 
155 namespace detail_ {
156 
157 inline ::std::string describe(
158  instance::update_status_t update_status,
159  node::handle_t node_handle,
160  template_::handle_t graph_template_handle)
161 {
162  ::std::string result = describe(update_status);
163  if (node_handle != node::no_handle) {
164  result += node::detail_::identify(node_handle, graph_template_handle);
165  }
166  return result;
167 }
168 
169 /*
170 inline ::std::string identify(instance::handle_t handle)
171 {
172  return "execution graph instance at " + cuda::detail_::ptr_as_hex(handle);
173 }
174 
175 inline ::std::string identify(instance::handle_t handle, template_::handle_t template_handle)
176 {
177  return identify(handle) + " within " + graph::template_::detail_::identify(template_handle);
178 }
179 
180 inline ::std::string identify(const instance_t& instance)
181 {
182  return identify(instance.handle(), instance.template_handle());
183 }
184 */
185 
186 } // namespace detail_
187 
188 } // namespace instance
189 
190 namespace template_ {
191 
192 namespace detail_ {
193 
194 inline ::std::string identify(const template_t& graph_template)
195 {
196  return identify(graph_template.handle());
197 }
198 
199 } // namespace detail_
200 
201 } // namespace template_
202 
203 } // namespace graph
204 
205 inline ::std::string describe(graph::instance::update_status_t update_status, optional<graph::node_t> node)
206 {
207  return node ?
208  describe(update_status) :
209  graph::instance::detail_::describe(update_status, node.value().handle(), node.value().containing_graph_handle());
210 }
211 
212 namespace stream {
213 namespace capture {
214 
215 inline graph::template_t end(const cuda::stream_t& stream)
216 {
217  graph::template_::handle_t new_graph;
218  auto status = cuStreamEndCapture(stream.handle(), &new_graph);
219  throw_if_error_lazy(status,
220  "Completing the capture of operations into a graph on " + stream::detail_::identify(stream));
221  return graph::template_::wrap(new_graph, do_take_ownership);
222 }
223 
224 } // namespace capture
225 } // namespace stream
226 
227 inline void stream_t::enqueue_t::graph_launch(const graph::instance_t& graph_instance) const
228 {
229  graph::launch(graph_instance, associated_stream);
230 }
231 
232 } // namespace cuda
233 
234 #endif // CUDA_VERSION >= 10000
235 
236 #endif // MULTI_WRAPPER_IMPLS_GRAPHS_HPP_
237 
Proxy class for a CUDA stream.
Definition: stream.hpp:246
stream::handle_t handle() const noexcept
The raw CUDA handle for a stream which this class wraps.
Definition: stream.hpp:257
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
constexpr bool is_failure(status_t status)
Determine whether the API call returning the specified status had failed.
Definition: error.hpp:209
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
inline ::std::string describe(status_t status)
Obtain a brief textual explanation for a specified kind of CUDA Runtime API status or error code...
Definition: error.hpp:215
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264