35 #ifndef CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ 36 #define CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ 41 #if CUDA_VERSION >= 12000 46 #if CUDA_VERSION >= 9000 48 #include <cuda_runtime.h> 49 #endif // CUDA_VERSION >= 9000 51 #include <type_traits> 83 struct kernel_parameter_decay {
85 using U = typename ::std::remove_reference<P>::type;
87 using type = typename ::std::conditional<
88 ::std::is_array<U>::value,
89 typename ::std::remove_extent<U>::type*,
90 typename ::std::conditional<
91 ::std::is_function<U>::value,
92 typename ::std::add_pointer<U>::type,
99 using kernel_parameter_decay_t =
typename kernel_parameter_decay<P>::type;
101 template<
typename Fun>
102 struct is_function_ptr: bool_constant<
103 ::std::is_pointer<Fun>::value and ::std::is_function<typename ::std::remove_pointer<Fun>::type>::value> { };
105 inline void collect_argument_addresses(
void**) { }
107 template <
typename Arg,
typename... Args>
108 inline void collect_argument_addresses(
void** collected_addresses, Arg&& arg, Args&&... args)
110 collected_addresses[0] =
const_cast<void*
>(
static_cast<const void*
>(&arg));
111 collect_argument_addresses(collected_addresses + 1, ::std::forward<Args>(args)...);
115 template<
typename Kernel,
typename... KernelParameters>
116 struct enqueue_launch_helper {
118 Kernel&& kernel_function,
121 KernelParameters &&... parameters)
const;
124 template<
typename Kernel,
typename... KernelParameters>
126 bool_constant<false>,
127 bool_constant<false>,
128 Kernel&& kernel_function,
131 KernelParameters&&... parameters);
133 template<
typename Kernel,
typename... KernelParameters>
136 bool_constant<false>,
140 KernelParameters&&... parameters);
142 template<
typename Kernel,
typename... KernelParameters>
144 bool_constant<false>,
149 KernelParameters&&... parameters);
151 inline void enqueue_kernel_launch_by_handle_in_current_context(
152 kernel::handle_t kernel_function_handle,
157 const void** marshalled_arguments);
159 template<
typename KernelFunction,
typename... KernelParameters>
160 void enqueue_raw_kernel_launch_in_current_context(
161 KernelFunction&& kernel_function,
166 KernelParameters&&... parameters)
173 using decayed_kf_type = typename ::std::decay<KernelFunction>::type;
174 static_assert(::std::is_function<decayed_kf_type>::value or is_function_ptr<decayed_kf_type>::value,
175 "Only a bona fide function can be launched as a CUDA kernel");
183 >>>(::std::forward<KernelParameters>(parameters)...);
187 #if CUDA_VERSION < 9000 189 "Only CUDA versions 9.0 and later support launching kernels with additional" 190 "arguments, e.g block cooperation");
196 static constexpr
const auto non_zero_num_params =
197 sizeof...(KernelParameters) == 0 ? 1 :
sizeof...(KernelParameters);
198 void* argument_ptrs[non_zero_num_params];
202 detail_::collect_argument_addresses(argument_ptrs, ::std::forward<KernelParameters>(parameters)...);
203 #if CUDA_VERSION >= 11000 204 kernel::handle_t kernel_function_handle = kernel::apriori_compiled::detail_::get_handle( (
const void*) kernel_function);
205 enqueue_kernel_launch_by_handle_in_current_context(
206 kernel_function_handle,
210 launch_configuration,
211 const_cast<const void**>(argument_ptrs));
213 #else // CUDA_VERSION is at least 9000 but under 11000 215 (void) context_handle;
216 auto status = cudaLaunchCooperativeKernel(
217 (
const void *) kernel_function,
218 (dim3)(uint3)launch_configuration.
dimensions.grid,
219 (dim3)(uint3)launch_configuration.
dimensions.block,
222 cudaStream_t(stream_handle));
224 #endif // CUDA_VERSION >= 11000 225 #endif // CUDA_VERSION >= 9000 246 template<
typename... KernelParameters>
247 struct raw_kernel_typegen {
256 using type = void(*)(cuda::detail_::kernel_parameter_decay_t<KernelParameters>...);
267 template<
typename... KernelParameters>
268 typename detail_::raw_kernel_typegen<KernelParameters...>::type
271 using raw_kernel_t =
typename detail_::raw_kernel_typegen<KernelParameters ...>::type;
272 return reinterpret_cast<raw_kernel_t
>(
const_cast<void *
>(kernel.
ptr()));
279 template<
typename... KernelParameters>
285 KernelParameters &&... parameters)
const;
319 template<
typename Kernel,
typename... KernelParameters>
324 KernelParameters&&... parameters);
331 template<
typename Kernel,
typename... KernelParameters>
335 KernelParameters&&... parameters);
357 template <
typename SpanOfConstVo
idPtrLike>
363 SpanOfConstVoidPtrLike marshalled_arguments);
365 #if CUDA_VERSION >= 12000 366 template <
typename SpanOfConstVo
idPtrLike>
368 const library::kernel_t& kernel,
371 SpanOfConstVoidPtrLike marshalled_arguments);
373 #endif // CUDA_VERSION >= 12000 377 #endif // CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_ Proxy class for a CUDA stream.
Definition: stream.hpp:246
bool has_nondefault_attributes() const
Determine whether the configuration includes launch attributes different than the default values...
Definition: launch_configuration.hpp:156
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
The full set of possible configuration parameters for launching a kernel on a GPU.
Definition: launch_configuration.hpp:69
CUcontext handle_t
Raw CUDA driver handle for a context; see {context_t}.
Definition: types.hpp:878
constexpr grid::dimensions_t single_block()
A named constructor idiom for grid::dimensions_t, which, when used, will result in a grid with a sing...
Definition: kernel_launch.hpp:64
grid::composite_dimensions_t dimensions
Dimensions of the launch grid in blocks, and of the individual blocks in the grid.
Definition: launch_configuration.hpp:71
CUdevice id_t
Numeric ID of a CUDA device used by the CUDA Runtime API.
Definition: types.hpp:850
void ensure_none(const ::std::string &message) noexcept(false)
Does nothing (unless throwing an exception)
Definition: error.hpp:438
void launch(Kernel &&kernel, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Variant of enqueue_launch for use with the default stream in the current context. ...
Definition: kernel_launch.hpp:394
A richer (kind-of-a-)wrapper for CUDA's dim3 class, used to specify dimensions for blocks (in terms o...
Definition: types.hpp:325
void enqueue_launch(Kernel &&kernel, const stream_t &stream, launch_configuration_t launch_configuration, KernelParameters &&... parameters)
Enqueues a kernel on a stream (=queue) on the current CUDA device.
Definition: kernel_launch.hpp:25
constexpr grid::block_dimensions_t single_thread_per_block()
A named constructor idiom for grid::dimensions_t, which, when used, will result in a grid whose block...
Definition: kernel_launch.hpp:70
A (base?) class for exceptions raised by CUDA code; these errors are thrown by essentially all CUDA R...
Definition: error.hpp:271
A subclass of the kernel_t interface for kernels being functions marked as global in source files and...
Definition: apriori_compiled.hpp:310
#define throw_if_error_lazy(status__,...)
A macro for only throwing an error if we've failed - which also ensures no string is constructed unle...
Definition: error.hpp:316
void launch_type_erased(const kernel_t &kernel, const stream_t &stream, launch_configuration_t launch_configuration, SpanOfConstVoidPtrLike marshalled_arguments)
Launch a kernel with the arguments pre-marshalled into the (main) form which the CUDA driver's launch...
Definition: kernel_launch.hpp:411
memory::shared::size_t dynamic_shared_memory_size
The number of bytes each grid block may use, in addition to the statically-allocated shared memory da...
Definition: launch_configuration.hpp:77
Contains the class cuda::launch_configuration_t and some supporting code.
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
Contains a base wrapper class for CUDA kernels - both statically and dynamically compiled; and some r...
const void * ptr() const noexcept
Access the raw __global__ kernel function pointer - without any type information. ...
Definition: apriori_compiled.hpp:319
A non-owning wrapper for CUDA kernels - whether they be __global__ functions compiled apriori...
Definition: kernel.hpp:159
detail_::raw_kernel_typegen< KernelParameters... >::type unwrap(const kernel::apriori_compiled_t &kernel)
A function similar to ::std::any_cast for retrieving the function pointer wrapped by a cuda::kernel::...
Definition: kernel_launch.hpp:269
The cuda::library::kernel_t class and related code.
An implementation of a subclass of kernel_t for kernels compiled together with the host-side program...