8 #ifndef CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP     9 #define CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP    11 #include "../launch_configuration.hpp"    12 #include "../kernel.hpp"    13 #include "../device.hpp"    14 #include "../event.hpp"    20 inline void validate_compatibility(
    21     const kernel_t& kernel,
    22     launch_configuration_t launch_config) noexcept(
false)
    24     validate(launch_config);
    25     validate_block_dimension_compatibility(kernel, launch_config.dimensions.block);
    26     validate_dyanmic_shared_memory_size(kernel, launch_config.dynamic_shared_memory_size);
    29     validate_compatibility(kernel.device(), launch_config);
    32 #if CUDA_VERSION >= 12000    33 inline CUlaunchConfig marshal(
    34     const launch_configuration_t &config,
    36     span<CUlaunchAttribute> attribute_storage) noexcept(
true)
    38     unsigned int num_attributes = 0;
    40     if (config.block_cooperation) {
    41         auto &attr_value = attribute_storage[num_attributes++];
    42         attr_value.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
    43         attr_value.value.cooperative = 1;
    46         auto &attr_value = attribute_storage[num_attributes++];
    47         attr_value.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
    48         attr_value.value.clusterDim.x = config.clustering.cluster_dimensions.x;
    49         attr_value.value.clusterDim.y = config.clustering.cluster_dimensions.y;
    50         attr_value.value.clusterDim.z = config.clustering.cluster_dimensions.z;
    52     if (config.clustering.scheduling_policy != cluster_scheduling_policy_t::default_) {
    53         auto &attribute = attribute_storage[num_attributes++];
    54         attribute.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE;
    55         attribute.value.clusterSchedulingPolicyPreference =
    56             static_cast<CUclusterSchedulingPolicy
>(config.clustering.scheduling_policy);
    59     if (config.programmatically_dependent_launch) {
    60         auto &attr_value = attribute_storage[num_attributes++];
    61         attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION;
    62         attr_value.value.programmaticStreamSerializationAllowed = 1;
    64     if (config.programmatic_completion.event) {
    65         auto &attr_value = attribute_storage[num_attributes++];
    66         attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT;
    67         attr_value.value.programmaticEvent.event = config.programmatic_completion.event->handle();
    69         attr_value.value.programmaticEvent.triggerAtBlockStart =
    70             config.programmatic_completion.trigger_event_at_block_start;
    73     if (config.in_remote_memory_synchronization_domain) {
    74         auto &attr_value = attribute_storage[num_attributes++];
    75         attr_value.id = CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN;
    76         attr_value.value.memSyncDomain = CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE;
    79     attribute_storage[num_attributes] = {CU_LAUNCH_ATTRIBUTE_IGNORE, {}, {}};
    82         config.dimensions.grid.x,
    83         config.dimensions.grid.y,
    84         config.dimensions.grid.z,
    85         config.dimensions.block.x,
    86         config.dimensions.block.y,
    87         config.dimensions.block.z,
    88         config.dynamic_shared_memory_size,
    90         attribute_storage.data(),
    94 #endif // CUDA_VERSION >= 12000   100 #endif //CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP Definitions and functionality wrapping CUDA APIs. 
Definition: array.hpp:22
 
CUstream handle_t
The CUDA driver's raw handle for streams. 
Definition: types.hpp:239
 
static constexpr __host__ __device__ dimensions_t point() noexcept
Dimensions of a single point - trivial in all axes. 
Definition: types.hpp:363