eyalroz/cuda-api-wrappers/module_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_MODULE_HPP_
 #define CUDA_API_WRAPPERS_MODULE_HPP_

 #include "context.hpp"
 #include "primary_context.hpp"
 #include "kernel.hpp"
 #include "memory.hpp"
 #include "array.hpp"
 #include "link_options.hpp"
 #include <array>

 #if __cplusplus >= 201703L
 #include <filesystem>
 #endif

 namespace cuda {

 class device_t;
 class context_t;
 class module_t;
 class kernel_t;

 namespace module {

 // The CUDA driver's raw handle for modules
 using handle_t = CUmodule;

 namespace detail_ {

 inline module_t wrap(
     device::id_t            device_id,
     context::handle_t       context_handle,
     handle_t                handle,
     bool                    take_ownership = false,
     bool                    holds_primary_context_refcount_unit = false) noexcept;

 inline ::std::string identify(const module::handle_t &handle)
 {
     return ::std::string("module ") + cuda::detail_::ptr_as_hex(handle);
 }

 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle)
 {
     return identify(handle) + " in " + context::detail_::identify(context_handle);
 }

 inline ::std::string identify(const module::handle_t &handle, context::handle_t context_handle, device::id_t device_id)
 {
     return identify(handle) + " in " + context::detail_::identify(context_handle, device_id);
 }

 ::std::string identify(const module_t &module);

 inline void destroy(handle_t handle, context::handle_t context_handle, device::id_t device_id);

 #if CUDA_VERSION >= 12040
 inline unique_span<kernel::handle_t> get_kernel_handles(handle_t module_handle, size_t num_kernels)
 {
     auto result = make_unique_span<kernel::handle_t>(num_kernels);
     auto status = cuModuleEnumerateFunctions(result.data(), (unsigned int) num_kernels, module_handle);
     throw_if_error_lazy(status, "Failed enumerating the kernels in " + module::detail_::identify(module_handle));
     return result;
 }
 #endif

 } // namespace detail_

 template <typename Locus, typename ContiguousContainer,
     cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
 module_t create(
     Locus&&                 locus,
     ContiguousContainer     module_data,
     const link::options_t&  link_options);

 template <typename Locus, typename ContiguousContainer,
     cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
 module_t create(
     Locus&&              locus,
     ContiguousContainer  module_data);

 #if CUDA_VERSION >= 12030
 using loading_mode_t = CUmoduleLoadingMode;

 inline loading_mode_t loading_mode() {
     loading_mode_t result;
     auto status = cuModuleGetLoadingMode(&result);
     throw_if_error_lazy(status, "Failed obtaining CUDA module loading mode");
     return result;
 }
 #endif

 } // namespace module

 class module_t {

 public: // getters
     module::handle_t handle() const { return handle_; }
     context::handle_t context_handle() const { return context_handle_; }
     device::id_t device_id() const { return device_id_; }

     context_t context() const;

     device_t device() const;

     cuda::kernel_t get_kernel(const char* name) const;

     cuda::kernel_t get_kernel(const ::std::string& name) const
     {
         return get_kernel(name.c_str());
     }

     memory::region_t get_global_region(const char* name) const
     {
         CUdeviceptr dptr;
         size_t size;
         auto result = cuModuleGetGlobal(&dptr, &size, handle_, name);
         throw_if_error_lazy(result, "Obtaining the address and size of a named global object");
         return { memory::as_pointer(dptr), size };
     }

 #if CUDA_VERSION >= 12040
     size_t get_num_kernels() const
     {
         unsigned result;
         auto status = cuModuleGetFunctionCount(&result, handle_);
         throw_if_error_lazy(status, "Failed determining function count for " + module::detail_::identify(*this));
         return result;
     }

     unique_span<kernel_t> get_kernels() const
     {
         auto num_kernels = get_num_kernels();
         // It's ok if the number is 0!
         auto handles = module::detail_::get_kernel_handles(handle_, num_kernels);
         auto gen = [&](size_t i) { return kernel::wrap(device_id_, context_handle_, handles[i]); };
         return generate_unique_span<kernel_t>(handles.size(), gen);
     }
 #endif // CUDA_VERSION >= 12040

     // TODO: Implement a surface reference and texture reference class rather than these raw pointers.

 #if CUDA_VERSION < 12000
     CUsurfref get_surface(const char* name) const;

     CUtexref get_texture_reference(const char* name) const;
 #endif

 protected: // constructors

     module_t(
         device::id_t device_id,
         context::handle_t context,
         module::handle_t handle,
         bool owning,
         bool holds_primary_context_refcount_unit)
     noexcept
         : device_id_(device_id), context_handle_(context), handle_(handle), owning_(owning),
           holds_pc_refcount_unit_(holds_primary_context_refcount_unit)
     { }

 public: // friendship

     friend module_t module::detail_::wrap(device::id_t, context::handle_t, module::handle_t, bool, bool) noexcept;

 public: // constructors and destructor

     module_t(const module_t&) = delete;

     module_t(module_t&& other) noexcept :
         module_t(
             other.device_id_,
             other.context_handle_,
             other.handle_,
             other.owning_,
             other.holds_pc_refcount_unit_)
     {
         other.owning_ = false;
         other.holds_pc_refcount_unit_ = false;
     };

     // Note: It is up to the user of this class to ensure that it is destroyed _before_ the context
     // in which it was created; and one needs to be particularly careful about this point w.r.t.
     // primary contexts
     ~module_t() noexcept(false)
     {
         if (owning_) {
             module::detail_::destroy(handle_, context_handle_, device_id_);
         }
         // TODO: DRY
         if (holds_pc_refcount_unit_) {
 #ifdef NDEBUG
             device::primary_context::detail_::decrease_refcount_nothrow(device_id_);
                 // Note: "Swallowing" any potential error to avoid ::std::terminate(); also,
                 // because a failure probably means the primary context is inactive already
 #else
             device::primary_context::detail_::decrease_refcount(device_id_);
 #endif
         }
     }

 public: // operators

     module_t& operator=(const module_t&) = delete;
     module_t& operator=(module_t&& other) noexcept
     {
         ::std::swap(device_id_, other.device_id_);
         ::std::swap(context_handle_, other.context_handle_);
         ::std::swap(handle_, other.handle_);
         ::std::swap(owning_, other.owning_);
         ::std::swap(holds_pc_refcount_unit_, holds_pc_refcount_unit_);
         return *this;
     }

 protected: // data members
     device::id_t       device_id_;
     context::handle_t  context_handle_;
     module::handle_t   handle_;
     bool               owning_;
         // this field is mutable only for enabling move construction; other
         // than in that case it must not be altered
     bool holds_pc_refcount_unit_;
         // When context_handle_ is the handle of a primary context, this module
         // may be "keeping that context alive" through the refcount - in which
         // case it must release its refcount unit on destruction
 };

 namespace module {

 namespace detail_ {

 inline module_t load_from_file_in_current_context(
     device::id_t            current_context_device_id,
     context::handle_t       current_context_handle,
     const char *            path,
     bool                    holds_primary_context_refcount_unit = false)
 {
     handle_t new_module_handle;
     auto status = cuModuleLoad(&new_module_handle, path);
     throw_if_error_lazy(status, ::std::string("Failed loading a module from file ") + path);
     bool do_take_ownership{true};
     return wrap(
         current_context_device_id,
         current_context_handle,
         new_module_handle,
         do_take_ownership,
         holds_primary_context_refcount_unit);
 }

 } // namespace detail_


 inline module_t load_from_file(
     const context_t&        context,
     const char*             path)
 {
     CAW_SET_SCOPE_CONTEXT(context.handle());
     return detail_::load_from_file_in_current_context(context.device_id(), context.handle(), path);
 }

 inline module_t load_from_file(
     const context_t&        context,
     const ::std::string&    path)
 {
     return load_from_file(context, path.c_str());
 }

 module_t load_from_file(
     const device_t&         device,
     const char*             path);

 inline module_t load_from_file(
     const device_t&         device,
     const ::std::string&    path)
 {
     return load_from_file(device, path.c_str());
 }

 module_t load_from_file(const char* path);

 inline module_t load_from_file(const ::std::string& path)
 {
     return load_from_file(path.c_str());
 }

 #if __cplusplus >= 201703L
 inline module_t load_from_file(
     const device_t&                 device,
     const ::std::filesystem::path&  path)
 {
     return load_from_file(device, path.c_str());
 }

 inline module_t load_from_file(
     const ::std::filesystem::path&  path)
 {
     return load_from_file(device::current::get(), path);
 }
 #endif

 namespace detail_ {

 inline module_t wrap(
     device::id_t            device_id,
     context::handle_t       context_handle,
     handle_t                module_handle,
     bool                    take_ownership,
     bool                    hold_pc_refcount_unit
 ) noexcept
 {
     return module_t{device_id, context_handle, module_handle, take_ownership, hold_pc_refcount_unit};
 }

 module_t create(const context_t& context, const void* module_data, const link::options_t& link_options);

 module_t create(const context_t& context, const void* module_data);

 inline void destroy(handle_t handle, context::handle_t context_handle, device::id_t device_id)
 {
     CAW_SET_SCOPE_CONTEXT(context_handle);
     auto status = cuModuleUnload(handle);
     throw_if_error_lazy(status, "Failed unloading " + identify(handle, context_handle, device_id));
 }

 } // namespace detail_

 // TODO: Use an optional to reduce the number of functions here... when the
 // library starts requiring C++14.

 namespace detail_ {

 inline ::std::string identify(const module_t& module)
 {
     return identify(module.handle(), module.context_handle(), module.device_id());
 }

 inline context_t get_context_for(const context_t& locus) { return locus; }
 inline device::primary_context_t get_context_for(device_t& locus);

 } // namespace detail_

 template <typename Locus, typename ContiguousContainer,
     cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
 module_t create(
     Locus&&             locus,
     ContiguousContainer module_data)
 {
     auto context = detail_::get_context_for(locus);
     return detail_::create(context, module_data.data());
 }

 // Note: The following may create the primary context of a device!
 template <typename Locus, typename ContiguousContainer,
     cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool>>
 module_t create(
     Locus&&                 locus,
     ContiguousContainer     module_data,
     const link::options_t&  link_options)
 {
     auto context = detail_::get_context_for(locus);
     return detail_::create(context, module_data.data(), link_options);
 }

 } // namespace module

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_MODULE_HPP_
cuda::link::options_t
A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130

cuda::context_t
Wrapper class for a CUDA context.
Definition: context.hpp:244

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::memory::managed::region_t
detail_::region_helper< memory::region_t > region_t
A child class of the generic region_t with some managed-memory-specific functionality.
Definition: memory.hpp:1960

cuda::context::handle_t
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878

cuda::device::primary_context_t
A class for holding the primary context of a CUDA device.
Definition: primary_context.hpp:112

primary_context.hpp

cuda::device::id_t
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850

cuda::module_t
Wrapper class for a CUDA code module.
Definition: module.hpp:123

cuda::module_t::get_kernel
cuda::kernel_t get_kernel(const ::std::string &name) const
Obtains a kernel constituting part of this module.
Definition: module.hpp:151

context.hpp
Contains a proxy class for CUDA execution contexts.

cuda::module::load_from_file
module_t load_from_file(const context_t &context, const char *path)
Load a module from an appropriate compiled or semi-compiled file, allocating all relevant resources f...
Definition: module.hpp:317

array.hpp
Contains a proxy class for CUDA arrays - GPU memory with 2-D or 3-D locality and hardware support for...

cuda::module_t::handle
module::handle_t handle() const
Getters for the module object&#39;s raw constituent fields.
Definition: module.hpp:128

throw_if_error_lazy
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we&#39;ve failed - which also ensures no string is constructed unle...
Definition: error.hpp:316

cuda::memory::as_pointer
void * as_pointer(device::address_t address) noexcept
Definition: types.hpp:700

kernel.hpp
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...

cuda::kernel_t
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159

link_options.hpp
Contains cuda::link::options_t class and related definitions.

cuda::module_t::get_global_region
memory::region_t get_global_region(const char *name) const
Get the mapping of a named memory region in this module to actual memory.
Definition: module.hpp:157

cuda::module::handle_t
CUmodule handle_t
Raw CUDA driver handle of a module of compiled code; see module_t.
Definition: module.hpp:34

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

memory.hpp
freestanding wrapper functions for working with CUDA&#39;s various kinds of memory spaces, arranged into a relevant namespace hierarchy.