8 #ifndef CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP 9 #define CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP 11 #include "../launch_configuration.hpp" 12 #include "../kernel.hpp" 13 #include "../device.hpp" 14 #include "../event.hpp" 20 inline void validate_compatibility(
21 const kernel_t& kernel,
22 launch_configuration_t launch_config) noexcept(
false)
24 validate(launch_config);
25 validate_block_dimension_compatibility(kernel, launch_config.dimensions.block);
26 validate_dyanmic_shared_memory_size(kernel, launch_config.dynamic_shared_memory_size);
29 validate_compatibility(kernel.device(), launch_config);
32 #if CUDA_VERSION >= 12000 33 inline CUlaunchConfig marshal(
34 const launch_configuration_t &config,
36 span<CUlaunchAttribute> attribute_storage) noexcept(
true)
38 unsigned int num_attributes = 0;
40 if (config.block_cooperation) {
41 auto &attr_value = attribute_storage[num_attributes++];
42 attr_value.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
43 attr_value.value.cooperative = 1;
46 auto &attr_value = attribute_storage[num_attributes++];
47 attr_value.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
48 attr_value.value.clusterDim.x = config.clustering.cluster_dimensions.x;
49 attr_value.value.clusterDim.y = config.clustering.cluster_dimensions.y;
50 attr_value.value.clusterDim.z = config.clustering.cluster_dimensions.z;
52 if (config.clustering.scheduling_policy != cluster_scheduling_policy_t::default_) {
53 auto &attribute = attribute_storage[num_attributes++];
54 attribute.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE;
55 attribute.value.clusterSchedulingPolicyPreference =
56 static_cast<CUclusterSchedulingPolicy
>(config.clustering.scheduling_policy);
59 if (config.programmatically_dependent_launch) {
60 auto &attr_value = attribute_storage[num_attributes++];
61 attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION;
62 attr_value.value.programmaticStreamSerializationAllowed = 1;
64 if (config.programmatic_completion.event) {
65 auto &attr_value = attribute_storage[num_attributes++];
66 attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT;
67 attr_value.value.programmaticEvent.event = config.programmatic_completion.event->handle();
69 attr_value.value.programmaticEvent.triggerAtBlockStart =
70 config.programmatic_completion.trigger_event_at_block_start;
73 if (config.in_remote_memory_synchronization_domain) {
74 auto &attr_value = attribute_storage[num_attributes++];
75 attr_value.id = CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN;
76 attr_value.value.memSyncDomain = CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE;
79 attribute_storage[num_attributes] = {CU_LAUNCH_ATTRIBUTE_IGNORE, {}, {}};
82 config.dimensions.grid.x,
83 config.dimensions.grid.y,
84 config.dimensions.grid.z,
85 config.dimensions.block.x,
86 config.dimensions.block.y,
87 config.dimensions.block.z,
88 config.dynamic_shared_memory_size,
90 attribute_storage.data(),
94 #endif // CUDA_VERSION >= 12000 100 #endif //CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
CUstream handle_t
The CUDA driver's raw handle for streams.
Definition: types.hpp:239
static constexpr __host__ __device__ dimensions_t point() noexcept
Dimensions of a single point - trivial in all axes.
Definition: types.hpp:363