cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
launch_configuration.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP
9 #define CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP
10 
11 #include "../launch_configuration.hpp"
12 #include "../kernel.hpp"
13 #include "../device.hpp"
14 #include "../event.hpp"
15 
16 namespace cuda {
17 
18 namespace detail_ {
19 
20 inline void validate_compatibility(
21  const kernel_t& kernel,
22  launch_configuration_t launch_config) noexcept(false)
23 {
24  validate(launch_config);
25  validate_block_dimension_compatibility(kernel, launch_config.dimensions.block);
26  validate_dyanmic_shared_memory_size(kernel, launch_config.dynamic_shared_memory_size);
27  // Uncomment if we actually get such checks
28  // validate_grid_dimension_compatibility(kernel, launch_config.dimensions.grid);
29  validate_compatibility(kernel.device(), launch_config);
30 }
31 
32 #if CUDA_VERSION >= 12000
33 inline CUlaunchConfig marshal(
34  const launch_configuration_t &config,
35  const stream::handle_t stream_handle,
36  span<CUlaunchAttribute> attribute_storage) noexcept(true)
37 {
38  unsigned int num_attributes = 0;
39  // TODO: What about CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW ?
40  if (config.block_cooperation) {
41  auto &attr_value = attribute_storage[num_attributes++];
42  attr_value.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
43  attr_value.value.cooperative = 1;
44  }
45  if (grid::dimensions_t::point() != config.clustering.cluster_dimensions) {
46  auto &attr_value = attribute_storage[num_attributes++];
47  attr_value.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
48  attr_value.value.clusterDim.x = config.clustering.cluster_dimensions.x;
49  attr_value.value.clusterDim.y = config.clustering.cluster_dimensions.y;
50  attr_value.value.clusterDim.z = config.clustering.cluster_dimensions.z;
51  }
52  if (config.clustering.scheduling_policy != cluster_scheduling_policy_t::default_) {
53  auto &attribute = attribute_storage[num_attributes++];
54  attribute.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE;
55  attribute.value.clusterSchedulingPolicyPreference =
56  static_cast<CUclusterSchedulingPolicy>(config.clustering.scheduling_policy);
57  }
58  // TODO: CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
59  if (config.programmatically_dependent_launch) {
60  auto &attr_value = attribute_storage[num_attributes++];
61  attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION;
62  attr_value.value.programmaticStreamSerializationAllowed = 1;
63  }
64  if (config.programmatic_completion.event) {
65  auto &attr_value = attribute_storage[num_attributes++];
66  attr_value.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT;
67  attr_value.value.programmaticEvent.event = config.programmatic_completion.event->handle();
68  // TODO: What about the flags?
69  attr_value.value.programmaticEvent.triggerAtBlockStart =
70  config.programmatic_completion.trigger_event_at_block_start;
71  }
72  // What about CU_LAUNCH_ATTRIBUTE_PRIORITY ?
73  if (config.in_remote_memory_synchronization_domain) {
74  auto &attr_value = attribute_storage[num_attributes++];
75  attr_value.id = CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN;
76  attr_value.value.memSyncDomain = CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE;
77  }
78  // What about CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP ?
79  attribute_storage[num_attributes] = {CU_LAUNCH_ATTRIBUTE_IGNORE, {}, {}};
80 
81  return {
82  config.dimensions.grid.x,
83  config.dimensions.grid.y,
84  config.dimensions.grid.z,
85  config.dimensions.block.x,
86  config.dimensions.block.y,
87  config.dimensions.block.z,
88  config.dynamic_shared_memory_size,
89  stream_handle,
90  attribute_storage.data(),
91  num_attributes
92  };
93 }
94 #endif // CUDA_VERSION >= 12000
95 
96 } // namespace detail_
97 
98 } // namespace cuda
99 
100 #endif //CUDA_API_WRAPPERS_MULTI_WRAPPERS_LAUNCH_CONFIGURATION_HPP
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
CUstream handle_t
The CUDA driver&#39;s raw handle for streams.
Definition: types.hpp:239
static constexpr __host__ __device__ dimensions_t point() noexcept
Dimensions of a single point - trivial in all axes.
Definition: types.hpp:363