7 #ifndef CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_ 8 #define CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_ 10 #if CUDA_VERSION >= 12000 12 #include "../library.hpp" 14 #include <type_traits> 33 template <
typename Kernel>
34 struct is_library_kernel : ::std::is_same<typename ::std::decay<Kernel>::type, library::kernel_t> { };
39 kernel_t contextualize(
const library::kernel_t& kernel,
const context_t& context);
58 return "library kernel at " + cuda::detail_::ptr_as_hex(handle);
63 return identify(handle) +
" within " + library::detail_::identify(library_handle);
66 ::std::string identify(
const kernel_t &kernel);
68 inline ::std::pair<cuda::kernel::handle_t, status_t> contextualize_in_current_context(
71 cuda::kernel::handle_t contextualized_kernel_handle;
72 auto status = cuKernelGetFunction(&contextualized_kernel_handle, library_kernel_handle);
73 return {contextualized_kernel_handle, status};
76 inline cuda::kernel::handle_t contextualize(
80 CAW_SET_SCOPE_CONTEXT(context_handle);
81 auto handle_and_status = contextualize_in_current_context(kernel_handle);
82 throw_if_error_lazy(handle_and_status.second,
"Failed placing " + identify(kernel_handle) +
" in " 83 + context::detail_::identify(context_handle));
84 return handle_and_status.first;
87 inline attribute_value_t get_attribute(
90 kernel::attribute_t attribute)
92 attribute_value_t value;
93 auto status = cuKernelGetAttribute(&value, attribute, library_kernel_handle, device_id);
95 + cuda::kernel::detail_::attribute_name(attribute) +
" for " + identify(library_kernel_handle)
96 +
" on " + device::detail_::identify(device_id));
100 inline void set_attribute(
103 kernel::attribute_t attribute,
104 attribute_value_t value)
106 auto status = cuKernelSetAttribute(attribute, value, library_kernel_handle, device_id);
108 + cuda::kernel::detail_::attribute_name(attribute) +
" value to " + ::std::to_string(value)
109 +
" for " + identify(library_kernel_handle) +
" on " + device::detail_::identify(device_id));
114 attribute_value_t get_attribute(
115 const library::kernel_t& library_kernel,
116 kernel::attribute_t attribute,
117 const device_t& device);
119 inline void set_attribute(
120 const library::kernel_t& library_kernel,
121 kernel::attribute_t attribute,
122 const device_t& device,
123 attribute_value_t value);
141 #if CUDA_VERSION >= 12300 147 const char* name()
const 149 if (name_ !=
nullptr) {
return name_; }
151 auto status = cuKernelGetName(&result, handle_);
162 library_handle_(library_handle), handle_(handle) {}
165 kernel_t(
const kernel_t &) =
default;
166 kernel_t(kernel_t&& other) =
default;
174 mutable const char* name_ {
nullptr };
182 return {library_handle, handle};
185 inline ::std::string identify(
const kernel_t& library_kernel)
187 return identify(library_kernel.library_handle(), library_kernel.handle());
192 inline kernel_t
get(
const library_t&
library,
const char* name)
194 auto kernel_handle = cuda::library::detail_::get_kernel_in_current_context(library.handle(), name);
202 inline library::kernel_t library_t::get_kernel(
const char* name)
const 204 return library::kernel::get(*
this, name);
207 inline library::kernel_t library_t::get_kernel(const ::std::string& name)
const 209 return get_kernel(name.c_str());
212 inline library::kernel_t library_t::get_kernel(
const context_t& context,
const char* name)
const 215 return library::kernel::get(*
this, name);
218 inline library::kernel_t library_t::get_kernel(
const context_t& context, const ::std::string& name)
const 220 return get_kernel(context, name.c_str());
225 #endif // CUDA_VERSION >= 12000 227 #endif // CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_ int attribute_value_t
The uniform type the CUDA driver uses for all kernel attributes; it is typically more appropriate to ...
Definition: types.hpp:988
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
CUarray handle_t
Raw CUDA driver handle for arrays (of any dimension)
Definition: array.hpp:34
#define CUDA_CONTEXT_FOR_THIS_SCOPE(_cuda_context)
This macro will set the current device for the remainder of the scope in which it is invoked...
Definition: current_context.hpp:267
array_t< T, NumDimensions > wrap(device::id_t device_id, context::handle_t context_handle, handle_t handle, dimensions_t< NumDimensions > dimensions) noexcept
Wrap an existing CUDA array in an array_t instance.
Definition: array.hpp:264
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
A host-side binary object with embedded device code; a .o file.
CUfunction_attribute attribute_t
Raw CUDA driver selector of a kernel attribute.
Definition: types.hpp:983