cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
link_options.hpp
Go to the documentation of this file.
1 
6 #pragma once
7 #ifndef CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_
8 #define CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_
9 
11 #include "device_properties.hpp"
12 #include "types.hpp"
13 
14 #include <array>
15 
16 namespace cuda {
17 
19 class module_t;
21 
22 namespace link {
23 
32 };
33 
34 namespace detail_ {
35 
37 using option_t = CUjit_option;
38 
49 struct marshalled_options_t {
51  using size_type = unsigned;
52 
55  constexpr static const size_type max_num_options { CU_JIT_NUM_OPTIONS };
56 
57 protected:
58  ::std::array<option_t, max_num_options> option_buffer;
59  ::std::array<void*, max_num_options> value_buffer;
60  size_type count_ { 0 };
61 public:
62  void push_back(option_t option)
63  {
64  if (count_ >= max_num_options) {
65  throw ::std::invalid_argument("Attempt to push back the same option a second time");
66  // If each option is pushed back at most once, the count cannot exist the number
67  // of possible options. In fact, it can't even reach it because some options contradict.
68  //
69  // Note: This check will not catch all repeat push-backs, nor the case of conflicting
70  // options - the cuLink methods will catch those. We just want to avoid overflow.
71  }
72  option_buffer[count_] = option;
73  count_++;
74  }
75 protected:
76  template <typename I>
77  void* process_value(typename ::std::enable_if<::std::is_integral<I>::value, I>::type value)
78  {
79  return reinterpret_cast<void*>(static_cast<uintptr_t>(value));
80  }
81 
82  template <typename T>
83  void* process_value(T* value)
84  {
85  return static_cast<void*>(value);
86  }
87 
88  void* process_value(bool value) { return process_value<int>(value ? 1 : 0); }
89 
90  void* process_value(caching_mode_t<memory_operation_t::load> value)
91  {
92  using ut = typename ::std::underlying_type<caching_mode_t<memory_operation_t::load>>::type;
93  return process_value(static_cast<ut>(value));
94  }
95 
96 public:
103  template <typename T>
104  void push_back(option_t option, T value)
105  {
106  push_back(option);
107  process_value(value);
108  // Now set value_buffer[count-1]...
109  value_buffer[count_-1] = process_value(value);
110  }
111 
115  const option_t* options() const { return option_buffer.data(); }
116  const void * const * values() const { return value_buffer.data(); }
117  size_type count() const { return count_; }
119 };
120 
121 } // namespace detail_
122 
131 
133  struct {
135  optional<span<char>> info;
136 
138  optional<span<char>> error;
139 
141  bool verbose;
142  } logs;
143 
147  bool obtain_target_from_cuda_context { true };
148 
151  optional<fallback_strategy_for_binary_code_t> fallback_strategy_for_binary_code;
152 
153  // Ignoring the "internal purposes only" options;
154  //
155  // CU_JIT_NEW_SM3X_OPT
156  // CU_JIT_FAST_COMPILE
157  // CU_JIT_GLOBAL_SYMBOL_NAMES
158  // CU_JIT_GLOBAL_SYMBOL_ADDRESSES
159  // CU_JIT_GLOBAL_SYMBOL_COUNT
160  //
161 };
162 
163 namespace detail_ {
164 
167 inline marshalled_options_t marshal(const options_t& link_options)
168 {
169  marshalled_options_t marshalled{};
170  const auto& lo = link_options;
171 
172  if (lo.max_num_registers_per_thread) {
173  marshalled.push_back(CU_JIT_MAX_REGISTERS, lo.max_num_registers_per_thread.value());
174  }
175 
176  if (lo.min_num_threads_per_block) {
177  marshalled.push_back(CU_JIT_THREADS_PER_BLOCK, lo.min_num_threads_per_block.value());
178  }
179 
180  if (lo.logs.info) {
181  marshalled.push_back(CU_JIT_INFO_LOG_BUFFER, lo.logs.info.value().data());
182  marshalled.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, lo.logs.info.value().size());
183  }
184 
185  if (lo.logs.error) {
186  marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER, lo.logs.error.value().data());
187  marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, lo.logs.error.value().size());
188  }
189 
190  if (lo.optimization_level) {
191  marshalled.push_back(CU_JIT_OPTIMIZATION_LEVEL, lo.optimization_level.value());
192  }
193 
194  if (lo.obtain_target_from_cuda_context) {
195  marshalled.push_back(CU_JIT_TARGET_FROM_CUCONTEXT);
196  }
197  else if (lo.specific_target) {
198  marshalled.push_back(CU_JIT_TARGET, lo.specific_target.value().as_combined_number());
199  }
200 
201  if (lo.fallback_strategy_for_binary_code) {
202  marshalled.push_back(CU_JIT_FALLBACK_STRATEGY, lo.fallback_strategy_for_binary_code.value());
203  }
204 
205  if (lo.generate_debug_info) {
206  marshalled.push_back(CU_JIT_GENERATE_DEBUG_INFO);
207  }
208 
209  if (lo.generate_source_line_info) {
210  marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
211  }
212 
213  if (lo.generate_source_line_info) {
214  marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
215  }
216 
217  if (lo.logs.verbose) {
218  marshalled.push_back(CU_JIT_LOG_VERBOSE);
219  }
220 
221  if (lo.default_load_caching_mode()) {
222  marshalled.push_back(CU_JIT_CACHE_MODE, lo.default_load_caching_mode().value());
223  }
224 
225  return marshalled;
226 }
227 
228 } // namespace detail_
229 
230 // TODO: Compiler "output options":
231 //
232 // threads per block targeted
233 // compilation wall time
234 // amount written to info log
235 
236 } // namespace link
237 
238 } // namespace cuda
239 
240 #endif // CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t count()
Get the number of CUDA devices usable on the system (with the current CUDA library and kernel driver)...
Definition: miscellany.hpp:63
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149
Definitions and utility functions relating to just-in-time compilation and linking of CUDA PTX code...
Fundamental CUDA-related type definitions.
Classes representing specific and overall properties of CUDA devices.