7 #ifndef CUDA_API_WRAPPERS_MODULE_HPP_ 8 #define CUDA_API_WRAPPERS_MODULE_HPP_ 18 #if __cplusplus >= 201703L 44 bool take_ownership =
false,
45 bool holds_primary_context_refcount_unit =
false) noexcept;
49 return ::std::string(
"module ") + cuda::detail_::ptr_as_hex(handle);
54 return identify(handle) +
" in " + context::detail_::identify(context_handle);
59 return identify(handle) +
" in " + context::detail_::identify(context_handle, device_id);
62 ::std::string identify(
const module_t &module);
66 #if CUDA_VERSION >= 12040 67 inline unique_span<kernel::handle_t> get_kernel_handles(
handle_t module_handle,
size_t num_kernels)
69 auto result = make_unique_span<kernel::handle_t>(num_kernels);
70 auto status = cuModuleEnumerateFunctions(result.data(), (
unsigned int) num_kernels, module_handle);
71 throw_if_error_lazy(status,
"Failed enumerating the kernels in " + module::detail_::identify(module_handle));
89 template <
typename Locus,
typename ContiguousContainer,
91 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool> =
true >
94 ContiguousContainer module_data,
97 template <
typename Locus,
typename ContiguousContainer,
98 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool> =
true >
101 ContiguousContainer module_data);
104 #if CUDA_VERSION >= 12030 105 using loading_mode_t = CUmoduleLoadingMode;
107 inline loading_mode_t loading_mode() {
108 loading_mode_t result;
109 auto status = cuModuleGetLoadingMode(&result);
153 return get_kernel(name.c_str());
161 auto result = cuModuleGetGlobal(&dptr, &size, handle_, name);
166 #if CUDA_VERSION >= 12040 167 size_t get_num_kernels()
const 170 auto status = cuModuleGetFunctionCount(&result, handle_);
171 throw_if_error_lazy(status,
"Failed determining function count for " + module::detail_::identify(*
this));
175 unique_span<kernel_t> get_kernels()
const 177 auto num_kernels = get_num_kernels();
179 auto handles = module::detail_::get_kernel_handles(handle_, num_kernels);
180 auto gen = [&](
size_t i) {
return kernel::wrap(device_id_, context_handle_, handles[i]); };
181 return generate_unique_span<kernel_t>(handles.size(), gen);
183 #endif // CUDA_VERSION >= 12040 187 #if CUDA_VERSION < 12000 188 CUsurfref get_surface(
const char* name)
const;
196 CUtexref get_texture_reference(
const char* name)
const;
206 bool holds_primary_context_refcount_unit)
208 : device_id_(device_id), context_handle_(context), handle_(handle), owning_(owning),
209 holds_pc_refcount_unit_(holds_primary_context_refcount_unit)
223 other.context_handle_,
226 other.holds_pc_refcount_unit_)
228 other.owning_ =
false;
229 other.holds_pc_refcount_unit_ =
false;
238 module::detail_::destroy(handle_, context_handle_, device_id_);
241 if (holds_pc_refcount_unit_) {
243 device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
247 device::primary_context::detail_::decrease_refcount(device_id_);
257 ::std::swap(device_id_, other.device_id_);
258 ::std::swap(context_handle_, other.context_handle_);
259 ::std::swap(handle_, other.handle_);
260 ::std::swap(owning_, other.owning_);
261 ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
272 bool holds_pc_refcount_unit_;
282 inline module_t load_from_file_in_current_context(
286 bool holds_primary_context_refcount_unit =
false)
288 handle_t new_module_handle;
289 auto status = cuModuleLoad(&new_module_handle, path);
291 bool do_take_ownership{
true};
293 current_context_device_id,
294 current_context_handle,
297 holds_primary_context_refcount_unit);
321 CAW_SET_SCOPE_CONTEXT(context.handle());
322 return detail_::load_from_file_in_current_context(context.device_id(), context.handle(), path);
328 const ::std::string& path)
351 const ::std::string& path)
353 return load_from_file(device, path.c_str());
362 module_t load_from_file(
const char* path);
365 inline module_t load_from_file(const ::std::string& path)
367 return load_from_file(path.c_str());
370 #if __cplusplus >= 201703L 374 const ::std::filesystem::path& path)
376 return load_from_file(device, path.c_str());
381 const ::std::filesystem::path& path)
383 return load_from_file(device::current::get(), path);
394 bool hold_pc_refcount_unit
397 return module_t{device_id, context_handle, module_handle, take_ownership, hold_pc_refcount_unit};
416 CAW_SET_SCOPE_CONTEXT(context_handle);
417 auto status = cuModuleUnload(handle);
418 throw_if_error_lazy(status,
"Failed unloading " + identify(handle, context_handle, device_id));
428 inline ::std::string identify(
const module_t& module)
430 return identify(module.
handle(), module.context_handle(), module.device_id());
449 template <
typename Locus,
typename ContiguousContainer,
450 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool>>
453 ContiguousContainer module_data)
455 auto context = detail_::get_context_for(locus);
456 return detail_::create(context, module_data.data());
468 template <
typename Locus,
typename ContiguousContainer,
469 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool>>
472 ContiguousContainer module_data,
475 auto context = detail_::get_context_for(locus);
476 return detail_::create(context, module_data.data(), link_options);
483 #endif // CUDA_API_WRAPPERS_MODULE_HPP_ A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130
Wrapper class for a CUDA context.
Definition: context.hpp:244
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
Wrapper class for a CUDA code module.
Definition: module.hpp:123
cuda::kernel_t get_kernel(const ::std::string &name) const
Obtains a kernel constituting part of this module.
Definition: module.hpp:151
Contains a proxy class for CUDA execution contexts.
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:317
Contains a proxy class for CUDA arrays - GPU memory with 2-D or 3-D locality and hardware support for...
module::handle_t handle() const
Getters for the module object's raw constituent fields.
Definition: module.hpp:128
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
Contains cuda::link::options_t class and related definitions.
memory::region_t get_global_region(const char *name) const
Get the mapping of a named memory region in this module to actual memory.
Definition: module.hpp:157
CUmodule handle_t
Raw CUDA driver handle of a module of compiled code; see module_t.
Definition: module.hpp:34
Wrapper class for a CUDA device.
Definition: device.hpp:135
freestanding wrapper functions for working with CUDA's various kinds of memory spaces, arranged into a relevant namespace hierarchy.