cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
module.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_MODULE_HPP_
8 #define CUDA_API_WRAPPERS_MODULE_HPP_
9 
10 #include "context.hpp"
11 #include "primary_context.hpp"
12 #include "kernel.hpp"
13 #include "memory.hpp"
14 #include "array.hpp"
15 #include "link_options.hpp"
16 #include <array>
17 
18 #if __cplusplus >= 201703L
19 #include <filesystem>
20 #endif
21 
22 namespace cuda {
23 
25 class device_t;
26 class context_t;
27 class module_t;
28 class kernel_t;
30 
31 namespace module {
32 
33 // The CUDA driver's raw handle for modules
34 using handle_t = CUmodule;
35 
36 namespace detail_ {
37 
40 inline module_t wrap(
41  device::id_t device_id,
42  context::handle_t context_handle,
43  handle_t handle,
44  bool take_ownership = false,
45  bool holds_primary_context_refcount_unit = false) noexcept;
46 
47 inline ::std::string identify(const module::handle_t &handle)
48 {
49  return ::std::string("module ") + cuda::detail_::ptr_as_hex(handle);
50 }
51 
52 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle)
53 {
54  return identify(handle) + " in " + context::detail_::identify(context_handle);
55 }
56 
57 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle, device::id_t device_id)
58 {
59  return identify(handle) + " in " + context::detail_::identify(context_handle, device_id);
60 }
61 
62 ::std::string identify(const module_t &module);
63 
64 inline void destroy(handle_t handle, context::handle_t context_handle, device::id_t device_id);
65 
66 #if CUDA_VERSION >= 12040
67 inline unique_span<kernel::handle_t> get_kernel_handles(handle_t module_handle, size_t num_kernels)
68 {
69  auto result = make_unique_span<kernel::handle_t>(num_kernels);
70  auto status = cuModuleEnumerateFunctions(result.data(), (unsigned int) num_kernels, module_handle);
71  throw_if_error_lazy(status, "Failed enumerating the kernels in " + module::detail_::identify(module_handle));
72  return result;
73 }
74 #endif
75 
76 } // namespace detail_
77 
89 template <typename Locus, typename ContiguousContainer,
91  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
92 module_t create(
93  Locus&& locus,
94  ContiguousContainer module_data,
95  const link::options_t& link_options);
96 
97 template <typename Locus, typename ContiguousContainer,
98  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
99 module_t create(
100  Locus&& locus,
101  ContiguousContainer module_data);
103 
104 #if CUDA_VERSION >= 12030
105 using loading_mode_t = CUmoduleLoadingMode;
106 
107 inline loading_mode_t loading_mode() {
108  loading_mode_t result;
109  auto status = cuModuleGetLoadingMode(&result);
110  throw_if_error_lazy(status, "Failed obtaining CUDA module loading mode");
111  return result;
112 }
113 #endif
114 
115 } // namespace module
116 
123 class module_t {
124 
125 public: // getters
128  module::handle_t handle() const { return handle_; }
129  context::handle_t context_handle() const { return context_handle_; }
130  device::id_t device_id() const { return device_id_; }
132 
134  context_t context() const;
135 
137  device_t device() const;
138 
148  cuda::kernel_t get_kernel(const char* name) const;
149 
151  cuda::kernel_t get_kernel(const ::std::string& name) const
152  {
153  return get_kernel(name.c_str());
154  }
155 
157  memory::region_t get_global_region(const char* name) const
158  {
159  CUdeviceptr dptr;
160  size_t size;
161  auto result = cuModuleGetGlobal(&dptr, &size, handle_, name);
162  throw_if_error_lazy(result, "Obtaining the address and size of a named global object");
163  return { memory::as_pointer(dptr), size };
164  }
165 
166 #if CUDA_VERSION >= 12040
167  size_t get_num_kernels() const
168  {
169  unsigned result;
170  auto status = cuModuleGetFunctionCount(&result, handle_);
171  throw_if_error_lazy(status, "Failed determining function count for " + module::detail_::identify(*this));
172  return result;
173  }
174 
175  unique_span<kernel_t> get_kernels() const
176  {
177  auto num_kernels = get_num_kernels();
178  // It's ok if the number is 0!
179  auto handles = module::detail_::get_kernel_handles(handle_, num_kernels);
180  auto gen = [&](size_t i) { return kernel::wrap(device_id_, context_handle_, handles[i]); };
181  return generate_unique_span<kernel_t>(handles.size(), gen);
182  }
183 #endif // CUDA_VERSION >= 12040
184 
185  // TODO: Implement a surface reference and texture reference class rather than these raw pointers.
186 
187 #if CUDA_VERSION < 12000
188  CUsurfref get_surface(const char* name) const;
191 
196  CUtexref get_texture_reference(const char* name) const;
197 #endif
198 
199 protected: // constructors
200 
201  module_t(
202  device::id_t device_id,
203  context::handle_t context,
204  module::handle_t handle,
205  bool owning,
206  bool holds_primary_context_refcount_unit)
207  noexcept
208  : device_id_(device_id), context_handle_(context), handle_(handle), owning_(owning),
209  holds_pc_refcount_unit_(holds_primary_context_refcount_unit)
210  { }
211 
212 public: // friendship
213 
214  friend module_t module::detail_::wrap(device::id_t, context::handle_t, module::handle_t, bool, bool) noexcept;
215 
216 public: // constructors and destructor
217 
218  module_t(const module_t&) = delete;
219 
220  module_t(module_t&& other) noexcept :
221  module_t(
222  other.device_id_,
223  other.context_handle_,
224  other.handle_,
225  other.owning_,
226  other.holds_pc_refcount_unit_)
227  {
228  other.owning_ = false;
229  other.holds_pc_refcount_unit_ = false;
230  };
231 
232  // Note: It is up to the user of this class to ensure that it is destroyed _before_ the context
233  // in which it was created; and one needs to be particularly careful about this point w.r.t.
234  // primary contexts
235  ~module_t() noexcept(false)
236  {
237  if (owning_) {
238  module::detail_::destroy(handle_, context_handle_, device_id_);
239  }
240  // TODO: DRY
241  if (holds_pc_refcount_unit_) {
242 #ifdef NDEBUG
243  device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
244  // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
245  // because a failure probably means the primary context is inactive already
246 #else
247  device::primary_context::detail_::decrease_refcount(device_id_);
248 #endif
249  }
250  }
251 
252 public: // operators
253 
254  module_t& operator=(const module_t&) = delete;
255  module_t& operator=(module_t&& other) noexcept
256  {
257  ::std::swap(device_id_, other.device_id_);
258  ::std::swap(context_handle_, other.context_handle_);
259  ::std::swap(handle_, other.handle_);
260  ::std::swap(owning_, other.owning_);
261  ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
262  return *this;
263  }
264 
265 protected: // data members
266  device::id_t device_id_;
267  context::handle_t context_handle_;
268  module::handle_t handle_;
269  bool owning_;
270  // this field is mutable only for enabling move construction; other
271  // than in that case it must not be altered
272  bool holds_pc_refcount_unit_;
273  // When context_handle_ is the handle of a primary context, this module
274  // may be "keeping that context alive" through the refcount - in which
275  // case it must release its refcount unit on destruction
276 };
277 
278 namespace module {
279 
280 namespace detail_ {
281 
282 inline module_t load_from_file_in_current_context(
283  device::id_t current_context_device_id,
284  context::handle_t current_context_handle,
285  const char * path,
286  bool holds_primary_context_refcount_unit = false)
287 {
288  handle_t new_module_handle;
289  auto status = cuModuleLoad(&new_module_handle, path);
290  throw_if_error_lazy(status, ::std::string("Failed loading a module from file ") + path);
291  bool do_take_ownership{true};
292  return wrap(
293  current_context_device_id,
294  current_context_handle,
295  new_module_handle,
296  do_take_ownership,
297  holds_primary_context_refcount_unit);
298 }
299 
300 } // namespace detail_
301 
302 
318  const context_t& context,
319  const char* path)
320 {
321  CAW_SET_SCOPE_CONTEXT(context.handle());
322  return detail_::load_from_file_in_current_context(context.device_id(), context.handle(), path);
323 }
324 
327  const context_t& context,
328  const ::std::string& path)
329 {
330  return load_from_file(context, path.c_str());
331 }
332 
340  const device_t& device,
341  const char* path);
342 
349 inline module_t load_from_file(
350  const device_t& device,
351  const ::std::string& path)
352 {
353  return load_from_file(device, path.c_str());
354 }
355 
362 module_t load_from_file(const char* path);
363 
365 inline module_t load_from_file(const ::std::string& path)
366 {
367  return load_from_file(path.c_str());
368 }
369 
370 #if __cplusplus >= 201703L
371 inline module_t load_from_file(
373  const device_t& device,
374  const ::std::filesystem::path& path)
375 {
376  return load_from_file(device, path.c_str());
377 }
378 
380 inline module_t load_from_file(
381  const ::std::filesystem::path& path)
382 {
383  return load_from_file(device::current::get(), path);
384 }
385 #endif
386 
387 namespace detail_ {
388 
389 inline module_t wrap(
390  device::id_t device_id,
391  context::handle_t context_handle,
392  handle_t module_handle,
393  bool take_ownership,
394  bool hold_pc_refcount_unit
395 ) noexcept
396 {
397  return module_t{device_id, context_handle, module_handle, take_ownership, hold_pc_refcount_unit};
398 }
399 
409 module_t create(const context_t& context, const void* module_data, const link::options_t& link_options);
410 
412 module_t create(const context_t& context, const void* module_data);
413 
414 inline void destroy(handle_t handle, context::handle_t context_handle, device::id_t device_id)
415 {
416  CAW_SET_SCOPE_CONTEXT(context_handle);
417  auto status = cuModuleUnload(handle);
418  throw_if_error_lazy(status, "Failed unloading " + identify(handle, context_handle, device_id));
419 }
420 
421 } // namespace detail_
422 
423 // TODO: Use an optional to reduce the number of functions here... when the
424 // library starts requiring C++14.
425 
426 namespace detail_ {
427 
428 inline ::std::string identify(const module_t& module)
429 {
430  return identify(module.handle(), module.context_handle(), module.device_id());
431 }
432 
433 inline context_t get_context_for(const context_t& locus) { return locus; }
434 inline device::primary_context_t get_context_for(device_t& locus);
435 
436 } // namespace detail_
437 
449 template <typename Locus, typename ContiguousContainer,
450  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
451 module_t create(
452  Locus&& locus,
453  ContiguousContainer module_data)
454 {
455  auto context = detail_::get_context_for(locus);
456  return detail_::create(context, module_data.data());
457 }
458 
467 // Note: The following may create the primary context of a device!
468 template <typename Locus, typename ContiguousContainer,
469  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
470 module_t create(
471  Locus&& locus,
472  ContiguousContainer module_data,
473  const link::options_t& link_options)
474 {
475  auto context = detail_::get_context_for(locus);
476  return detail_::create(context, module_data.data(), link_options);
477 }
478 
479 } // namespace module
480 
481 } // namespace cuda
482 
483 #endif // CUDA_API_WRAPPERS_MODULE_HPP_
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
Wrapper class for a CUDA code module.
Definition: module.hpp:123
cuda::kernel_t get_kernel(const ::std::string &name) const
Obtains a kernel constituting part of this module.
Definition: module.hpp:151
Contains a proxy class for CUDA execution contexts.
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:317
Contains a proxy class for CUDA arrays - GPU memory with 2-D or 3-D locality and hardware support for...
module::handle_t handle() const
Getters for the module object&#39;s raw constituent fields.
Definition: module.hpp:128
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
memory::region_t get_global_region(const char *name) const
Get the mapping of a named memory region in this module to actual memory.
Definition: module.hpp:157
CUmodule handle_t
Raw CUDA driver handle of a module of compiled code; see module_t.
Definition: module.hpp:34
Wrapper class for a CUDA device.
Definition: device.hpp:135
freestanding wrapper functions for working with CUDA&#39;s various kinds of memory spaces, arranged into a relevant namespace hierarchy.