cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
module.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_MODULE_HPP_
8 #define CUDA_API_WRAPPERS_MODULE_HPP_
9 
10 #include "context.hpp"
11 #include "primary_context.hpp"
12 #include "kernel.hpp"
13 #include "memory.hpp"
14 #include "link_options.hpp"
15 
16 #if __cplusplus >= 201703L
17 #include <filesystem>
18 #endif
19 
20 namespace cuda {
21 
23 class device_t;
24 class context_t;
25 class module_t;
26 class kernel_t;
28 
29 namespace module {
30 
31 // The CUDA driver's raw handle for modules
32 using handle_t = CUmodule;
33 
34 namespace detail_ {
35 
38 inline module_t wrap(
39  device::id_t device_id,
40  context::handle_t context_handle,
41  handle_t handle,
42  bool take_ownership = false,
43  bool holds_primary_context_refcount_unit = false) noexcept;
44 
45 inline ::std::string identify(const module::handle_t &handle)
46 {
47  return ::std::string("module ") + cuda::detail_::ptr_as_hex(handle);
48 }
49 
50 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle)
51 {
52  return identify(handle) + " in " + context::detail_::identify(context_handle);
53 }
54 
55 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle, device::id_t device_id)
56 {
57  return identify(handle) + " in " + context::detail_::identify(context_handle, device_id);
58 }
59 
60 ::std::string identify(const module_t &module);
61 
62 inline void unload(handle_t handle, context::handle_t context_handle, device::id_t device_id)
63 {
64  CAW_SET_SCOPE_CONTEXT(context_handle);
65  auto status = cuModuleUnload(handle);
66  throw_if_error_lazy(status, "Failed unloading " + identify(handle, context_handle, device_id));
67 }
68 
69 #if CUDA_VERSION >= 12040
70 inline unique_span<kernel::handle_t> get_kernel_handles(handle_t module_handle, size_t num_kernels)
71 {
72  auto result = make_unique_span<kernel::handle_t>(num_kernels);
73  auto status = cuModuleEnumerateFunctions(result.data(), (unsigned int) num_kernels, module_handle);
74  throw_if_error_lazy(status, "Failed enumerating the kernels in " + module::detail_::identify(module_handle));
75  return result;
76 }
77 #endif
78 
79 } // namespace detail_
80 
92 template <typename Locus, typename ContiguousContainer,
94  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
95 module_t create(
96  Locus&& locus,
97  ContiguousContainer module_data,
98  const link::options_t& link_options);
99 
100 template <typename Locus, typename ContiguousContainer,
101  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
102 module_t create(
103  Locus&& locus,
104  ContiguousContainer module_data);
106 
107 #if CUDA_VERSION >= 12030
108 using loading_mode_t = CUmoduleLoadingMode;
109 
110 inline loading_mode_t loading_mode() {
111  loading_mode_t result;
112  auto status = cuModuleGetLoadingMode(&result);
113  throw_if_error_lazy(status, "Failed obtaining CUDA module loading mode");
114  return result;
115 }
116 #endif
117 
118 } // namespace module
119 
126 class module_t {
127 
128 public: // getters
131  module::handle_t handle() const { return handle_; }
132  context::handle_t context_handle() const { return context_handle_; }
133  device::id_t device_id() const { return device_id_; }
135 
137  context_t context() const;
138 
140  device_t device() const;
141 
151  cuda::kernel_t get_kernel(const char* name) const;
152 
154  cuda::kernel_t get_kernel(const ::std::string& name) const
155  {
156  return get_kernel(name.c_str());
157  }
158 
160  memory::region_t get_global_region(const char* name) const
161  {
162  CUdeviceptr dptr;
163  size_t size;
164  auto result = cuModuleGetGlobal(&dptr, &size, handle_, name);
165  throw_if_error_lazy(result, "Obtaining the address and size of a named global object");
166  return { memory::as_pointer(dptr), size };
167  }
168 
169 #if CUDA_VERSION >= 12040
170  size_t get_num_kernels() const
171  {
172  unsigned result;
173  auto status = cuModuleGetFunctionCount(&result, handle_);
174  throw_if_error_lazy(status, "Failed determining function count for " + module::detail_::identify(*this));
175  return result;
176  }
177 
178  unique_span<kernel_t> get_kernels() const
179  {
180  auto num_kernels = get_num_kernels();
181  // It's ok if the number is 0!
182  auto handles = module::detail_::get_kernel_handles(handle_, num_kernels);
183  auto gen = [&](size_t i) { return kernel::wrap(device_id_, context_handle_, handles[i]); };
184  return generate_unique_span<kernel_t>(handles.size(), gen);
185  }
186 #endif // CUDA_VERSION >= 12040
187 
188  // TODO: Implement a surface reference and texture reference class rather than these raw pointers.
189 
190 #if CUDA_VERSION < 12000
191  CUsurfref get_surface(const char* name) const;
194 
199  CUtexref get_texture_reference(const char* name) const;
200 #endif
201 
202 protected: // constructors
203 
204  module_t(
205  device::id_t device_id,
206  context::handle_t context,
207  module::handle_t handle,
208  bool owning,
209  bool holds_primary_context_refcount_unit)
210  noexcept
211  : device_id_(device_id), context_handle_(context), handle_(handle), owning_(owning),
212  holds_pc_refcount_unit_(holds_primary_context_refcount_unit)
213  { }
214 
215 public: // friendship
216 
217  friend module_t module::detail_::wrap(device::id_t, context::handle_t, module::handle_t, bool, bool) noexcept;
218 
219 public: // constructors and destructor
220 
221  module_t(const module_t&) = delete;
222 
223  module_t(module_t&& other) noexcept :
224  module_t(
225  other.device_id_,
226  other.context_handle_,
227  other.handle_,
228  other.owning_,
229  other.holds_pc_refcount_unit_)
230  {
231  other.owning_ = false;
232  other.holds_pc_refcount_unit_ = false;
233  };
234 
235  // Note: It is up to the user of this class to ensure that it is unloaded _before_ the context
236  // in which it was created; and one needs to be particularly careful about this point w.r.t.
237  // primary contexts
238  ~module_t() DESTRUCTOR_EXCEPTION_SPEC
239  {
240  if (owning_) {
241 #ifdef THROW_IN_DESTRUCTORS
242  try
243 #endif
244  {
245  module::detail_::unload(handle_, context_handle_, device_id_);
246  }
247 #ifdef THROW_IN_DESTRUCTORS
248  catch (...) {}
249 #endif
250  }
251  if (holds_pc_refcount_unit_) {
252  device::primary_context::detail_::decrease_refcount_in_dtor(device_id_);
253  }
254  }
255 
256 public: // operators
257 
258  module_t& operator=(const module_t&) = delete;
259  module_t& operator=(module_t&& other) noexcept
260  {
261  ::std::swap(device_id_, other.device_id_);
262  ::std::swap(context_handle_, other.context_handle_);
263  ::std::swap(handle_, other.handle_);
264  ::std::swap(owning_, other.owning_);
265  ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
266  return *this;
267  }
268 
269 protected: // data members
270  device::id_t device_id_;
271  context::handle_t context_handle_;
272  module::handle_t handle_;
273  bool owning_;
274  // this field is mutable only for enabling move construction; other
275  // than in that case it must not be altered
276  bool holds_pc_refcount_unit_;
277  // When context_handle_ is the handle of a primary context, this module
278  // may be "keeping that context alive" through the refcount - in which
279  // case it must release its refcount unit on destruction
280 };
281 
282 namespace module {
283 
284 namespace detail_ {
285 
286 inline module_t load_from_file_in_current_context(
287  device::id_t current_context_device_id,
288  context::handle_t current_context_handle,
289  const char * path,
290  bool holds_primary_context_refcount_unit = false)
291 {
292  handle_t new_module_handle;
293  auto status = cuModuleLoad(&new_module_handle, path);
294  throw_if_error_lazy(status, ::std::string("Failed loading a module from file ") + path);
295  bool do_take_ownership{true};
296  return wrap(
297  current_context_device_id,
298  current_context_handle,
299  new_module_handle,
300  do_take_ownership,
301  holds_primary_context_refcount_unit);
302 }
303 
304 } // namespace detail_
305 
306 
322  const context_t& context,
323  const char* path)
324 {
325  CAW_SET_SCOPE_CONTEXT(context.handle());
326  return detail_::load_from_file_in_current_context(context.device_id(), context.handle(), path);
327 }
328 
331  const context_t& context,
332  const ::std::string& path)
333 {
334  return load_from_file(context, path.c_str());
335 }
336 
344  const device_t& device,
345  const char* path);
346 
353 inline module_t load_from_file(
354  const device_t& device,
355  const ::std::string& path)
356 {
357  return load_from_file(device, path.c_str());
358 }
359 
366 module_t load_from_file(const char* path);
367 
369 inline module_t load_from_file(const ::std::string& path)
370 {
371  return load_from_file(path.c_str());
372 }
373 
374 #if __cplusplus >= 201703L
375 inline module_t load_from_file(
377  const device_t& device,
378  const ::std::filesystem::path& path)
379 {
380  return load_from_file(device, path.c_str());
381 }
382 
384 inline module_t load_from_file(
385  const ::std::filesystem::path& path)
386 {
387  return load_from_file(device::current::get(), path);
388 }
389 #endif
390 
391 namespace detail_ {
392 
393 inline module_t wrap(
394  device::id_t device_id,
395  context::handle_t context_handle,
396  handle_t module_handle,
397  bool take_ownership,
398  bool hold_pc_refcount_unit
399 ) noexcept
400 {
401  return module_t{device_id, context_handle, module_handle, take_ownership, hold_pc_refcount_unit};
402 }
403 
413 module_t create(const context_t& context, const void* module_data, const link::options_t& link_options);
414 
416 module_t create(const context_t& context, const void* module_data);
417 
418 } // namespace detail_
419 
420 // TODO: Use an optional to reduce the number of functions here... when the
421 // library starts requiring C++14.
422 
423 namespace detail_ {
424 
425 inline ::std::string identify(const module_t& module)
426 {
427  return identify(module.handle(), module.context_handle(), module.device_id());
428 }
429 
430 inline context_t get_context_for(const context_t& locus) { return locus; }
431 inline device::primary_context_t get_context_for(const device_t& locus);
432 
433 } // namespace detail_
434 
446 template <typename Locus, typename ContiguousContainer,
447  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
448 module_t create(
449  Locus&& locus,
450  ContiguousContainer module_data)
451 {
452  auto context = detail_::get_context_for(locus);
453  return detail_::create(context, module_data.data());
454 }
455 
464 // Note: The following may create the primary context of a device!
465 template <typename Locus, typename ContiguousContainer,
466  cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
467 module_t create(
468  Locus&& locus,
469  ContiguousContainer module_data,
470  const link::options_t& link_options)
471 {
472  auto context = detail_::get_context_for(locus);
473  return detail_::create(context, module_data.data(), link_options);
474 }
475 
476 } // namespace module
477 
478 } // namespace cuda
479 
480 #endif // CUDA_API_WRAPPERS_MODULE_HPP_
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:122
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
Wrapper class for a CUDA code module.
Definition: module.hpp:126
cuda::kernel_t get_kernel(const ::std::string &name) const
Obtains a kernel constituting part of this module.
Definition: module.hpp:154
Contains a proxy class for CUDA execution contexts.
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:321
module::handle_t handle() const
Getters for the module object&#39;s raw constituent fields.
Definition: module.hpp:131
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
memory::region_t get_global_region(const char *name) const
Get the mapping of a named memory region in this module to actual memory.
Definition: module.hpp:160
CUmodule handle_t
Raw CUDA driver handle of a module of compiled code; see module_t.
Definition: module.hpp:32
Wrapper class for a CUDA device.
Definition: device.hpp:135
freestanding wrapper functions for working with CUDA&#39;s various kinds of memory spaces, arranged into a relevant namespace hierarchy.