7 #ifndef CUDA_API_WRAPPERS_MODULE_HPP_ 8 #define CUDA_API_WRAPPERS_MODULE_HPP_ 16 #if __cplusplus >= 201703L 42 bool take_ownership =
false,
43 bool holds_primary_context_refcount_unit =
false) noexcept;
47 return ::std::string(
"module ") + cuda::detail_::ptr_as_hex(handle);
52 return identify(handle) +
" in " + context::detail_::identify(context_handle);
57 return identify(handle) +
" in " + context::detail_::identify(context_handle, device_id);
60 ::std::string identify(
const module_t &module);
64 CAW_SET_SCOPE_CONTEXT(context_handle);
65 auto status = cuModuleUnload(handle);
66 throw_if_error_lazy(status,
"Failed unloading " + identify(handle, context_handle, device_id));
69 #if CUDA_VERSION >= 12040 70 inline unique_span<kernel::handle_t> get_kernel_handles(
handle_t module_handle,
size_t num_kernels)
72 auto result = make_unique_span<kernel::handle_t>(num_kernels);
73 auto status = cuModuleEnumerateFunctions(result.data(), (
unsigned int) num_kernels, module_handle);
74 throw_if_error_lazy(status,
"Failed enumerating the kernels in " + module::detail_::identify(module_handle));
92 template <
typename Locus,
typename ContiguousContainer,
94 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool> =
true >
97 ContiguousContainer module_data,
100 template <
typename Locus,
typename ContiguousContainer,
101 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool> =
true >
104 ContiguousContainer module_data);
107 #if CUDA_VERSION >= 12030 108 using loading_mode_t = CUmoduleLoadingMode;
110 inline loading_mode_t loading_mode() {
111 loading_mode_t result;
112 auto status = cuModuleGetLoadingMode(&result);
156 return get_kernel(name.c_str());
164 auto result = cuModuleGetGlobal(&dptr, &size, handle_, name);
169 #if CUDA_VERSION >= 12040 170 size_t get_num_kernels()
const 173 auto status = cuModuleGetFunctionCount(&result, handle_);
174 throw_if_error_lazy(status,
"Failed determining function count for " + module::detail_::identify(*
this));
178 unique_span<kernel_t> get_kernels()
const 180 auto num_kernels = get_num_kernels();
182 auto handles = module::detail_::get_kernel_handles(handle_, num_kernels);
183 auto gen = [&](
size_t i) {
return kernel::wrap(device_id_, context_handle_, handles[i]); };
184 return generate_unique_span<kernel_t>(handles.size(), gen);
186 #endif // CUDA_VERSION >= 12040 190 #if CUDA_VERSION < 12000 191 CUsurfref get_surface(
const char* name)
const;
199 CUtexref get_texture_reference(
const char* name)
const;
209 bool holds_primary_context_refcount_unit)
211 : device_id_(device_id), context_handle_(context), handle_(handle), owning_(owning),
212 holds_pc_refcount_unit_(holds_primary_context_refcount_unit)
226 other.context_handle_,
229 other.holds_pc_refcount_unit_)
231 other.owning_ =
false;
232 other.holds_pc_refcount_unit_ =
false;
238 ~
module_t() DESTRUCTOR_EXCEPTION_SPEC
241 #ifdef THROW_IN_DESTRUCTORS 245 module::detail_::unload(handle_, context_handle_, device_id_);
247 #ifdef THROW_IN_DESTRUCTORS 251 if (holds_pc_refcount_unit_) {
252 device::primary_context::detail_::decrease_refcount_in_dtor(device_id_);
261 ::std::swap(device_id_, other.device_id_);
262 ::std::swap(context_handle_, other.context_handle_);
263 ::std::swap(handle_, other.handle_);
264 ::std::swap(owning_, other.owning_);
265 ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
276 bool holds_pc_refcount_unit_;
286 inline module_t load_from_file_in_current_context(
290 bool holds_primary_context_refcount_unit =
false)
292 handle_t new_module_handle;
293 auto status = cuModuleLoad(&new_module_handle, path);
295 bool do_take_ownership{
true};
297 current_context_device_id,
298 current_context_handle,
301 holds_primary_context_refcount_unit);
325 CAW_SET_SCOPE_CONTEXT(context.handle());
326 return detail_::load_from_file_in_current_context(context.device_id(), context.handle(), path);
332 const ::std::string& path)
355 const ::std::string& path)
357 return load_from_file(device, path.c_str());
366 module_t load_from_file(
const char* path);
369 inline module_t load_from_file(const ::std::string& path)
371 return load_from_file(path.c_str());
374 #if __cplusplus >= 201703L 378 const ::std::filesystem::path& path)
380 return load_from_file(device, path.c_str());
385 const ::std::filesystem::path& path)
387 return load_from_file(device::current::get(), path);
398 bool hold_pc_refcount_unit
401 return module_t{device_id, context_handle, module_handle, take_ownership, hold_pc_refcount_unit};
425 inline ::std::string identify(
const module_t& module)
427 return identify(module.
handle(), module.context_handle(), module.device_id());
446 template <
typename Locus,
typename ContiguousContainer,
447 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool>>
450 ContiguousContainer module_data)
452 auto context = detail_::get_context_for(locus);
453 return detail_::create(context, module_data.data());
465 template <
typename Locus,
typename ContiguousContainer,
466 cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value,
bool>>
469 ContiguousContainer module_data,
472 auto context = detail_::get_context_for(locus);
473 return detail_::create(context, module_data.data(), link_options);
480 #endif // CUDA_API_WRAPPERS_MODULE_HPP_ A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130
Wrapper class for a CUDA context.
Definition: context.hpp:249
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1974
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:122
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
Wrapper class for a CUDA code module.
Definition: module.hpp:126
cuda::kernel_t get_kernel(const ::std::string &name) const
Obtains a kernel constituting part of this module.
Definition: module.hpp:154
Contains a proxy class for CUDA execution contexts.
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:321
module::handle_t handle() const
Getters for the module object's raw constituent fields.
Definition: module.hpp:131
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:702
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
Contains cuda::link::options_t class and related definitions.
memory::region_t get_global_region(const char *name) const
Get the mapping of a named memory region in this module to actual memory.
Definition: module.hpp:160
CUmodule handle_t
Raw CUDA driver handle of a module of compiled code; see module_t.
Definition: module.hpp:32
Wrapper class for a CUDA device.
Definition: device.hpp:135
freestanding wrapper functions for working with CUDA's various kinds of memory spaces, arranged into a relevant namespace hierarchy.