cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
library.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_LIBRARY_HPP_
9 #define CUDA_API_WRAPPERS_LIBRARY_HPP_
10 
11 #if CUDA_VERSION >= 12000
12 
13 #include "module.hpp"
14 #include "error.hpp"
15 
16 #if __cplusplus >= 201703L
17 #include <filesystem>
18 #endif
19 
20 namespace cuda {
21 
23 class context_t;
24 class module_t;
25 class library_t;
26 class kernel_t;
28 
29 namespace library {
30 
31 using handle_t = CUlibrary;
32 
33 namespace kernel {
34 
35 using handle_t = CUkernel; // Don't be confused; a context-associated kernel is a CUfunction :-(
36 
37 } // namespace kernel
38 
39 namespace detail_ {
40 
41 using option_t = CUlibraryOption;
42 
43 } // namespace detail_
44 
45 class kernel_t; // A kernel stored within a library; strangely, a context-associated kernel is a CUfunction.
46 
47 namespace detail_ {
48 
49 inline library_t wrap(
50  handle_t handle,
51  bool take_ownership = false) noexcept;
52 
53 inline ::std::string identify(const library::handle_t &handle)
54 {
55  return ::std::string("library ") + cuda::detail_::ptr_as_hex(handle);
56 }
57 
58 ::std::string identify(const library_t &library);
59 
60 inline status_t unload_nothrow(handle_t handle) noexcept
61 {
62  return cuLibraryUnload(handle);
63 }
64 
65 inline void unload(handle_t handle)
66 {
67  auto status = unload_nothrow(handle);
68  throw_if_error_lazy(status, ::std::string{"Failed unloading "}
69  + library::detail_::identify(handle));
70 }
71 
72 } // namespace detail_
73 
80 template <typename ContiguousContainer,
82  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
83 library_t create(
84  ContiguousContainer library_data,
85  optional<link::options_t> link_options,
86  bool code_is_preserved);
88 
89 
90 namespace detail_ {
91 
92 inline kernel::handle_t get_kernel_in_current_context(handle_t library_handle, const char* name)
93 {
94  library::kernel::handle_t kernel_handle;
95  auto status = cuLibraryGetKernel(&kernel_handle, library_handle, name);
96  throw_if_error_lazy(status, ::std::string{"Failed obtaining kernel "}
97  + name + "' from " + library::detail_::identify(library_handle));
98  return kernel_handle;
99 }
100 
101 inline kernel::handle_t get_kernel(context::handle_t context_handle, handle_t library_handle, const char* name)
102 {
103  CAW_SET_SCOPE_CONTEXT(context_handle);
104  return get_kernel_in_current_context(library_handle, name);
105 }
106 
107 } // namespace detail_
108 
109 inline kernel_t get_kernel(const library_t& library, const char* name);
110 inline kernel_t get_kernel(context_t& context, const library_t& library, const char* name);
111 
112 } // namespace library
113 
114 memory::region_t get_global(const context_t& context, const library_t& library, const char* name);
115 memory::region_t get_managed_region(const library_t& library, const char* name);
116 
117 namespace module {
118 
119 module_t create(const context_t& context, const library_t& library);
120 module_t create(const library_t& library);
121 
122 } // namespace module
123 
124 void* get_unified_function(const context_t& context, const library_t& library, const char* symbol);
125 
130 class library_t {
131 
132 public: // getters
133 
134  library::handle_t handle() const { return handle_; }
135 
147  library::kernel_t get_kernel(const context_t& context, const char* name) const;
148  library::kernel_t get_kernel(const context_t& context, const ::std::string& name) const;
149  library::kernel_t get_kernel(const char* name) const;
150  library::kernel_t get_kernel(const ::std::string& name) const;
151 
152  memory::region_t get_global(const char* name) const
153  {
154  return cuda::get_global(context::current::get(), *this, name);
155  }
156 
157  memory::region_t get_global(const ::std::string& name) const
158  {
159  return get_global(name.c_str());
160  }
161 
162  memory::region_t get_managed(const char* name) const
163  {
164  return cuda::get_managed_region(*this, name);
165  }
166 
167  memory::region_t get_managed(const ::std::string& name) const
168  {
169  return get_managed(name.c_str());
170  }
171 
172 protected: // constructors
173 
174  library_t(library::handle_t handle, bool owning) noexcept
175  : handle_(handle), owning_(owning)
176  { }
177 
178 public: // friendship
179 
180  friend library_t library::detail_::wrap(library::handle_t, bool) noexcept;
181 
182 public: // constructors and destructor
183 
184  library_t(const library_t&) = delete;
185 
186  library_t(library_t&& other) noexcept : library_t(other.handle_, other.owning_)
187  {
188  other.owning_ = false;
189  };
190 
191  ~library_t() DESTRUCTOR_EXCEPTION_SPEC
192  {
193  if (not owning_) { return; }
194 #ifdef THROW_IN_DESTRUCTORS
195  library::detail_::unload(handle_);
196 #else
197  library::detail_::unload_nothrow(handle_);
198 #endif
199  }
200 
201 public: // operators
202 
203  library_t& operator=(const library_t&) = delete;
204  library_t& operator=(library_t&& other) noexcept
205  {
206  ::std::swap(handle_, other.handle_);
207  ::std::swap(owning_, other.owning_);
208  return *this;
209  }
210 
211 protected: // data members
212  library::handle_t handle_;
213  bool owning_;
214  // this field is mutable only for enabling move construction; other
215  // than in that case it must not be altered
216 };
217 
218 inline memory::region_t get_global(const context_t& context, const library_t& library, const char* name)
219 {
220  CUdeviceptr dptr;
221  size_t size;
222  auto result = cuLibraryGetGlobal(&dptr, &size, library.handle(), name);
223  throw_if_error_lazy(result,
224  ::std::string("Obtaining the memory address and size for the global object '") + name + "' from "
225  + library::detail_::identify(library) + " in context " + context::detail_::identify(context));
226  return { memory::as_pointer(dptr), size };
227  // Note: Nothing is holding a PC refcount unit here!
228 }
229 
230 // More library item getters
231 namespace library {
232 
233 } // namespace library
234 
235 inline memory::region_t get_managed_region(const library_t& library, const char* name)
236 {
237  memory::device::address_t region_start;
238  size_t region_size;
239  auto status = cuLibraryGetManaged(&region_start, &region_size, library.handle(), name);
240  throw_if_error_lazy(status, ::std::string("Failed obtaining the managed memory region '") + name
241  + "' from " + library::detail_::identify(library));
242  return { memory::as_pointer(region_start), region_size };
243 }
244 
245 namespace module {
246 
250 inline module_t create(const context_t& context, const library_t& library)
251 {
252  CAW_SET_SCOPE_CONTEXT(context.handle());
253  module::handle_t new_handle;
254  auto status = cuLibraryGetModule(&new_handle, library.handle());
255  throw_if_error_lazy(status, ::std::string("Failed creating a module '") +
256  + "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context));
257  constexpr const bool is_owning { true };
258  return module::detail_::wrap(context.device_id(), context.handle(), new_handle,
259  is_owning, do_hold_primary_context_refcount_unit);
260  // TODO: We could consider adding a variant of this function taking a context&&, and using that
261  // to decide whether or not to hold a PC refcount unit
262 }
263 
264 } // namespace module
265 
266 // I really have no idea what this does!
267 inline void* get_unified_function(const context_t& context, const library_t& library, const char* symbol)
268 {
269  CAW_SET_SCOPE_CONTEXT(context.handle());
270  void* function_ptr;
271  auto status = cuLibraryGetUnifiedFunction(&function_ptr, library.handle(), symbol);
272  throw_if_error_lazy(status, ::std::string("Failed obtaining a pointer for function '") + symbol
273  + "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context));
274  return function_ptr;
275 }
276 
277 namespace library {
278 
279 namespace detail_ {
280 
281 template <typename Creator, typename DataSource, typename ErrorStringGenerator>
282 library_t create(
283  Creator creator,
284  DataSource data_source,
285  ErrorStringGenerator error_string_generator,
286  const link::options_t& link_options = {},
287  bool code_is_preserved = false)
288 {
289  handle_t new_lib_handle;
290  auto raw_link_opts = link::detail_::marshal(link_options);
291  struct {
292  detail_::option_t options[1];
293  void* values[1];
294  unsigned count;
295  } raw_opts = { { CU_LIBRARY_BINARY_IS_PRESERVED }, { &code_is_preserved }, 1 };
296  auto status = creator(
297  &new_lib_handle, data_source,
298  const_cast<link::detail_::option_t*>(raw_link_opts.options()),
299  const_cast<void**>(raw_link_opts.values()), raw_link_opts.count(),
300  raw_opts.options, raw_opts.values, raw_opts.count
301  );
302  throw_if_error_lazy(status,
303  ::std::string("Failed loading a compiled CUDA code library from ") + error_string_generator());
304  bool do_take_ownership{true};
305  return detail_::wrap(new_lib_handle, do_take_ownership);
306 }
307 
308 } // namespace detail_
309 
321 inline library_t load_from_file(
323  const char* path,
324  const link::options_t& link_options = {},
325  bool code_is_preserved = false)
326 {
327  return detail_::create(
328  cuLibraryLoadFromFile, path,
329  [path]() { return ::std::string("file ") + path; },
330  link_options, code_is_preserved);
331 }
332 
333 inline library_t load_from_file(
334  const ::std::string& path,
335  const link::options_t& link_options = {},
336  bool code_is_preserved = false)
337 {
338  return load_from_file(path.c_str(), link_options, code_is_preserved);
339 }
340 
341 #if __cplusplus >= 201703L
342 
343 inline library_t load_from_file(
344  const ::std::filesystem::path& path,
345  const link::options_t& link_options = {},
346  bool code_is_preserved = false)
347 {
348  return load_from_file(path.c_str(), link_options, code_is_preserved);
349 }
350 
351 #endif
352 
354 namespace detail_ {
355 
356 inline library_t wrap(handle_t handle, bool take_ownership) noexcept
357 {
358  return library_t{handle, take_ownership};
359 }
360 
361 } // namespace detail_
362 
370 inline library_t create(
371  const void* module_data,
372  const link::options_t& link_options = {},
373  bool code_is_preserved = false)
374 {
375  return detail_::create(
376  cuLibraryLoadData, module_data,
377  [module_data]() { return ::std::string("data at ") + cuda::detail_::ptr_as_hex(module_data); },
378  link_options, code_is_preserved);
379 }
380 
381 
382 // TODO: Use an optional to reduce the number of functions here... when the
383 // library starts requiring C++14.
384 
385 namespace detail_ {
386 
387 inline ::std::string identify(const library_t& library)
388 {
389  return identify(library.handle());
390 }
391 
392 } // namespace detail_
393 
394 template <typename ContiguousContainer,
395  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> >
396 library_t create(
397  ContiguousContainer library_data,
398  optional<link::options_t> link_options,
399  bool code_is_preserved)
400 {
401  return create(library_data.data(), link_options, code_is_preserved);
402 }
403 
404 } // namespace library
405 
406 } // namespace cuda
407 
408 #endif // CUDA_VERSION >= 12000
409 
410 #endif // CUDA_API_WRAPPERS_LIBRARY_HPP_
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t count()
Get the number of CUDA devices usable on the system (with the current CUDA library and kernel driver)...
Definition: miscellany.hpp:63
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
STL namespace.
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:321
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:271
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
CUdeviceptr address_t
The numeric type which can represent the range of memory addresses on a CUDA device.
Definition: types.hpp:674
CUresult status_t
Indicates either the result (success or error index) of a CUDA Runtime or Driver API call...
Definition: types.hpp:74