35 #ifndef CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ 36 #define CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ 41 #if CUDA_VERSION >= 12000 46 #if CUDA_VERSION >= 9000 48 #include <cuda_runtime.h> 49 #endif // CUDA_VERSION >= 9000 51 #include <type_traits> 111 struct kernel_parameter_decay {
113 using U = typename ::std::remove_reference<P>::type;
115 using type = typename ::std::conditional<
116 ::std::is_array<U>::value,
117 typename ::std::remove_extent<U>::type*,
118 typename ::std::conditional<
119 ::std::is_function<U>::value,
120 typename ::std::add_pointer<U>::type,
127 using kernel_parameter_decay_t =
typename kernel_parameter_decay<P>::type;
129 template<
typename Fun>
130 struct is_function_ptr: bool_constant<
131 ::std::is_pointer<Fun>::value and ::std::is_function<typename ::std::remove_pointer<Fun>::type>::value> { };
133 inline void collect_argument_addresses(
void**) { }
135 template <
typename Arg,
typename... Args>
136 inline void collect_argument_addresses(
void** collected_addresses, Arg&& arg, Args&&... args)
138 collected_addresses[0] =
const_cast<void*
>(
static_cast<const void*
>(&arg));
139 collect_argument_addresses(collected_addresses + 1, ::std::forward<Args>(args)...);
142 template<
typename Kernel,
typename... KernelParameters>
143 struct enqueue_launch_helper {
145 Kernel&& kernel_function,
148 KernelParameters &&... parameters)
const;
151 template<
typename Kernel,
typename... KernelParameters>
153 bool_constant<false>,
154 bool_constant<false>,
155 Kernel&& kernel_function,
158 KernelParameters&&... parameters);
160 template<
typename Kernel,
typename... KernelParameters>
163 bool_constant<false>,
167 KernelParameters&&... parameters);
169 template<
typename Kernel,
typename... KernelParameters>
171 bool_constant<false>,
176 KernelParameters&&... parameters);
178 inline void enqueue_kernel_launch_by_handle_in_current_context(
179 kernel::handle_t kernel_function_handle,
184 const void** marshalled_arguments);
186 template<
typename KernelFunction,
typename... KernelParameters>
187 void enqueue_raw_kernel_launch_in_current_context(
188 KernelFunction&& kernel_function,
193 KernelParameters&&... parameters)
200 using decayed_kf_type = typename ::std::decay<KernelFunction>::type;
201 static_assert(::std::is_function<decayed_kf_type>::value or is_function_ptr<decayed_kf_type>::value,
202 "Only a bona fide function can be launched as a CUDA kernel");
210 >>>(::std::forward<KernelParameters>(parameters)...);
214 #if CUDA_VERSION < 9000 216 "Only CUDA versions 9.0 and later support launching kernels with additional" 217 "arguments, e.g block cooperation");
223 static constexpr
const auto non_zero_num_params =
224 sizeof...(KernelParameters) == 0 ? 1 :
sizeof...(KernelParameters);
225 void* argument_ptrs[non_zero_num_params];
229 detail_::collect_argument_addresses(argument_ptrs, ::std::forward<KernelParameters>(parameters)...);
230 #if CUDA_VERSION >= 11000 231 kernel::handle_t kernel_function_handle = kernel::apriori_compiled::detail_::get_handle( (
const void*) kernel_function);
232 enqueue_kernel_launch_by_handle_in_current_context(
233 kernel_function_handle,
237 launch_configuration,
238 const_cast<const void**>(argument_ptrs));
240 #else // CUDA_VERSION is at least 9000 but under 11000 242 (void) context_handle;
243 auto status = cudaLaunchCooperativeKernel(
244 (
const void *) kernel_function,
245 (dim3)(uint3)launch_configuration.
dimensions.grid,
246 (dim3)(uint3)launch_configuration.
dimensions.block,
249 cudaStream_t(stream_handle));
251 #endif // CUDA_VERSION >= 11000 252 #endif // CUDA_VERSION >= 9000 273 template<
typename... KernelParameters>
274 struct raw_kernel_typegen {
283 using type = void(*)(cuda::detail_::kernel_parameter_decay_t<KernelParameters>...);
294 template<
typename... KernelParameters>
295 typename detail_::raw_kernel_typegen<KernelParameters...>::type
298 using raw_kernel_t =
typename detail_::raw_kernel_typegen<KernelParameters ...>::type;
299 return reinterpret_cast<raw_kernel_t
>(
const_cast<void *
>(kernel.
ptr()));
306 template<
typename... KernelParameters>
312 KernelParameters &&... parameters)
const;
346 template<
typename Kernel,
typename... KernelParameters>
351 KernelParameters&&... parameters);
358 template<
typename Kernel,
typename... KernelParameters>
362 KernelParameters&&... parameters);
384 template <
typename SpanOfConstVo
idPtrLike>
390 SpanOfConstVoidPtrLike marshalled_arguments);
392 #if CUDA_VERSION >= 12000 393 template <
typename SpanOfConstVo
idPtrLike>
395 const library::kernel_t& kernel,
398 SpanOfConstVoidPtrLike marshalled_arguments);
400 #endif // CUDA_VERSION >= 12000 404 #endif // CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ Proxy class for a CUDA stream.
Definition: stream.hpp:258
bool has_nondefault_attributes() const
Determine whether the configuration includes launch attributes different than the default values...
Definition: launch_configuration.hpp:156
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:880
A trait characterizing those types which can be used as kernel parameters.
Definition: kernel_launch.hpp:76
constexpr grid::dimensions_t single_block()
A named constructor idiom for grid::dimensions_t, which, when used, will result in a grid with a sing...
Definition: kernel_launch.hpp:92
grid::composite_dimensions_t dimensions
Dimensions of the launch grid in blocks, and of the individual blocks in the grid.
Definition: launch_configuration.hpp:71
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:852
void ensure_none(const ::std::string &message) noexcept(false)
Does nothing (unless throwing an exception)
Definition: error.hpp:449
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:396
A richer (kind-of-a-)wrapper for CUDA's dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:322
void enqueue_launch(Kernel &&kernel, const stream_t &stream, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Enqueues a kernel on a stream (=queue) on the current CUDA device.
Definition: kernel_launch.hpp:25
constexpr grid::block_dimensions_t single_thread_per_block()
A named constructor idiom for grid::dimensions_t, which, when used, will result in a grid whose block...
Definition: kernel_launch.hpp:98
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:282
A subclass of the kernel_t interface for kernels being functions marked as global in source files and...
Definition: apriori_compiled.hpp:310
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:327
typename is_valid_kernel_argument< T >::type is_valid_kernel_argument_t
A convenience type using the is_valid_kernel_argument trait struct.
Definition: kernel_launch.hpp:82
void launch_type_erased(const kernel_t &kernel, const stream_t &stream, launch_configuration_t launch_configuration, SpanOfConstVoidPtrLike marshalled_arguments)
Launch a kernel with the arguments pre-marshalled into the (main) form which the CUDA driver's launch...
Definition: kernel_launch.hpp:413
memory::shared::size_t dynamic_shared_memory_size
The number of bytes each grid block may use, in addition to the statically-allocated shared memory da...
Definition: launch_configuration.hpp:77
Contains the class cuda::launch_configuration_t and some supporting code.
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:236
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
const void * ptr() const noexcept
Access the raw __global__ kernel function pointer - without any type information. ...
Definition: apriori_compiled.hpp:319
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
detail_::raw_kernel_typegen< KernelParameters... >::type unwrap(const kernel::apriori_compiled_t &kernel)
A function similar to ::std::any_cast for retrieving the function pointer wrapped by a cuda::kernel::...
Definition: kernel_launch.hpp:296
The cuda::library::kernel_t class and related code.
An implementation of a subclass of kernel_t for kernels compiled together with the host-side program...