cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
in_library.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_
8 #define CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_
9 
10 #if CUDA_VERSION >= 12000
11 
12 #include "../library.hpp"
13 
14 #include <type_traits>
15 
16 namespace cuda {
17 
19 class kernel_t;
20 class context_t;
22 
23 namespace library {
24 
26 class kernel_t;
28 
29 } // namespace library
30 
31 namespace detail_ {
32 
33 template <typename Kernel>
34 struct is_library_kernel : ::std::is_same<typename ::std::decay<Kernel>::type, library::kernel_t> { };
35 
36 } // namespace detail_
37 
38 // TODO: Avoid the copy?
39 kernel_t contextualize(const library::kernel_t& kernel, const context_t& context);
40 
41 namespace library {
42 
43 namespace kernel {
44 
45 using handle_t = CUkernel;
48 // using cuda::kernel::apriori_compiled::attributes_t;
49 
50 namespace detail_ {
51 
52 // Note: library kernels never hold a PC refcount unit, nor do they own anything;
53 // only the library wrapper owns (and it's not associated with the kernel).
54 kernel_t wrap(library::handle_t library_handle, kernel::handle_t handle);
55 
56 inline ::std::string identify(kernel::handle_t handle)
57 {
58  return "library kernel at " + cuda::detail_::ptr_as_hex(handle);
59 }
60 
61 inline ::std::string identify(library::handle_t library_handle, kernel::handle_t handle)
62 {
63  return identify(handle) + " within " + library::detail_::identify(library_handle);
64 }
65 
66 ::std::string identify(const kernel_t &kernel);
67 
68 inline ::std::pair<cuda::kernel::handle_t, status_t> contextualize_in_current_context(
69  const kernel::handle_t& library_kernel_handle)
70 {
71  cuda::kernel::handle_t contextualized_kernel_handle;
72  auto status = cuKernelGetFunction(&contextualized_kernel_handle, library_kernel_handle);
73  return {contextualized_kernel_handle, status};
74 }
75 
76 inline cuda::kernel::handle_t contextualize(
77  const handle_t& kernel_handle,
78  const context::handle_t context_handle)
79 {
80  CAW_SET_SCOPE_CONTEXT(context_handle);
81  auto handle_and_status = contextualize_in_current_context(kernel_handle);
82  throw_if_error_lazy(handle_and_status.second, "Failed placing " + identify(kernel_handle) + " in "
83  + context::detail_::identify(context_handle));
84  return handle_and_status.first;
85 }
86 
87 inline attribute_value_t get_attribute(
88  handle_t library_kernel_handle,
89  device::id_t device_id,
90  kernel::attribute_t attribute)
91 {
92  attribute_value_t value;
93  auto status = cuKernelGetAttribute(&value, attribute, library_kernel_handle, device_id);
94  throw_if_error_lazy(status, ::std::string("Failed getting attribute ")
95  + cuda::kernel::detail_::attribute_name(attribute) + " for " + identify(library_kernel_handle)
96  + " on " + device::detail_::identify(device_id));
97  return value;
98 }
99 
100 inline void set_attribute(
101  kernel::handle_t library_kernel_handle,
102  device::id_t device_id,
103  kernel::attribute_t attribute,
104  attribute_value_t value)
105 {
106  auto status = cuKernelSetAttribute(attribute, value, library_kernel_handle, device_id);
107  throw_if_error_lazy(status, ::std::string("Failed setting attribute ")
108  + cuda::kernel::detail_::attribute_name(attribute) + " value to " + ::std::to_string(value)
109  + " for " + identify(library_kernel_handle) + " on " + device::detail_::identify(device_id));
110 }
111 
112 } // namespace detail
113 
114 attribute_value_t get_attribute(
115  const library::kernel_t& library_kernel,
116  kernel::attribute_t attribute,
117  const device_t& device);
118 
119 inline void set_attribute(
120  const library::kernel_t& library_kernel,
121  kernel::attribute_t attribute,
122  const device_t& device,
123  attribute_value_t value);
124 
125 } // namespace kernel
126 
131 class kernel_t {
132 public: // getters
133  kernel::handle_t handle() const noexcept { return handle_; }
134  library::handle_t library_handle() const noexcept { return library_handle_; }
135  library_t library() const noexcept { return library::detail_::wrap(library_handle_); }
136 
137 public: // type_conversions
138 
139 public: // non-mutators
140 
141 #if CUDA_VERSION >= 12300
142 
147  const char* name() const
148  {
149  if (name_ != nullptr) { return name_; }
150  const char* result;
151  auto status = cuKernelGetName(&result, handle_);
152  throw_if_error_lazy(status, "Retrieving the name of " + kernel::detail_::identify(*this));
153  name_ = result;
154  return name_;
155  }
156 #endif
157  cuda::kernel_t contextualize(const context_t& context) const;
158 
159 protected: // ctors & dtor
160  kernel_t(library::handle_t library_handle, kernel::handle_t handle)
161  :
162  library_handle_(library_handle), handle_(handle) {}
163 
164 public: // ctors & dtor
165  kernel_t(const kernel_t &) = default;
166  kernel_t(kernel_t&& other) = default;
167 
168 public: // friends
170 
171 protected: // data members
172  library::handle_t library_handle_;
173  kernel::handle_t handle_;
174  mutable const char* name_ { nullptr }; // The name is cached after having been retrieved for the first time
175 }; // kernel_t
176 
177 namespace kernel {
178 namespace detail_ {
179 
180 inline kernel_t wrap(library::handle_t library_handle, kernel::handle_t handle)
181 {
182  return {library_handle, handle};
183 }
184 
185 inline ::std::string identify(const kernel_t& library_kernel)
186 {
187  return identify(library_kernel.library_handle(), library_kernel.handle());
188 }
189 
190 } // namespace detail_
191 
192 inline kernel_t get(const library_t& library, const char* name)
193 {
194  auto kernel_handle = cuda::library::detail_::get_kernel_in_current_context(library.handle(), name);
195  return kernel::detail_::wrap(library.handle(), kernel_handle);
196 }
197 
198 } // namespace kernel
199 
200 } // namespace library
201 
202 inline library::kernel_t library_t::get_kernel(const char* name) const
203 {
204  return library::kernel::get(*this, name);
205 }
206 
207 inline library::kernel_t library_t::get_kernel(const ::std::string& name) const
208 {
209  return get_kernel(name.c_str());
210 }
211 
212 inline library::kernel_t library_t::get_kernel(const context_t& context, const char* name) const
213 {
215  return library::kernel::get(*this, name);
216 }
217 
218 inline library::kernel_t library_t::get_kernel(const context_t& context, const ::std::string& name) const
219 {
220  return get_kernel(context, name.c_str());
221 }
222 
223 } // namespace cuda
224 
225 #endif // CUDA_VERSION >= 12000
226 
227 #endif // CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_
228 
int attribute_value_t
The uniform type the CUDA driver uses for all kernel attributes; it is typically more appropriate to ...
Definition: types.hpp:988
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
#define CUDA_CONTEXT_FOR_THIS_SCOPE(_cuda_context)
This macro will set the current device for the remainder of the scope in which it is invoked...
Definition: current_context.hpp:267
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
CUfunction_attribute attribute_t
Raw CUDA driver selector of a kernel attribute.
Definition: types.hpp:983