7 #ifndef CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_ 8 #define CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_ 37 using option_t = CUjit_option;
49 struct marshalled_options_t {
51 using size_type = unsigned;
55 constexpr
static const size_type max_num_options { CU_JIT_NUM_OPTIONS };
58 ::std::array<option_t, max_num_options> option_buffer;
59 ::std::array<void*, max_num_options> value_buffer;
60 size_type count_ { 0 };
62 void push_back(option_t option)
64 if (count_ >= max_num_options) {
65 throw ::std::invalid_argument(
"Attempt to push back the same option a second time");
72 option_buffer[count_] = option;
77 void* process_value(typename ::std::enable_if<::std::is_integral<I>::value, I>::type value)
79 return reinterpret_cast<void*
>(
static_cast<uintptr_t
>(value));
83 void* process_value(T* value)
85 return static_cast<void*
>(value);
88 void* process_value(
bool value) {
return process_value<int>(value ? 1 : 0); }
90 void* process_value(caching_mode_t<memory_operation_t::load> value)
92 using ut = typename ::std::underlying_type<caching_mode_t<memory_operation_t::load>>::type;
93 return process_value(static_cast<ut>(value));
103 template <
typename T>
104 void push_back(option_t option, T value)
107 process_value(value);
109 value_buffer[count_-1] = process_value(value);
115 const option_t* options()
const {
return option_buffer.data(); }
116 const void *
const * values()
const {
return value_buffer.data(); }
117 size_type
count()
const {
return count_; }
147 bool obtain_target_from_cuda_context {
true };
167 inline marshalled_options_t marshal(
const options_t& link_options)
169 marshalled_options_t marshalled{};
170 const auto& lo = link_options;
172 if (lo.max_num_registers_per_thread) {
173 marshalled.push_back(CU_JIT_MAX_REGISTERS, lo.max_num_registers_per_thread.value());
176 if (lo.min_num_threads_per_block) {
177 marshalled.push_back(CU_JIT_THREADS_PER_BLOCK, lo.min_num_threads_per_block.value());
181 marshalled.push_back(CU_JIT_INFO_LOG_BUFFER, lo.logs.info.value().data());
182 marshalled.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, lo.logs.info.value().size());
186 marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER, lo.logs.error.value().data());
187 marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, lo.logs.error.value().size());
190 if (lo.optimization_level) {
191 marshalled.push_back(CU_JIT_OPTIMIZATION_LEVEL, lo.optimization_level.value());
194 if (lo.obtain_target_from_cuda_context) {
195 marshalled.push_back(CU_JIT_TARGET_FROM_CUCONTEXT);
197 else if (lo.specific_target) {
198 marshalled.push_back(CU_JIT_TARGET, lo.specific_target.value().as_combined_number());
201 if (lo.fallback_strategy_for_binary_code) {
202 marshalled.push_back(CU_JIT_FALLBACK_STRATEGY, lo.fallback_strategy_for_binary_code.value());
205 if (lo.generate_debug_info) {
206 marshalled.push_back(CU_JIT_GENERATE_DEBUG_INFO);
209 if (lo.generate_source_line_info) {
210 marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
213 if (lo.generate_source_line_info) {
214 marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
217 if (lo.logs.verbose) {
218 marshalled.push_back(CU_JIT_LOG_VERBOSE);
221 if (lo.default_load_caching_mode()) {
222 marshalled.push_back(CU_JIT_CACHE_MODE, lo.default_load_caching_mode().value());
240 #endif // CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_ A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130
fallback_strategy_for_binary_code_t
Possible strategies for obtaining fully-compiled binary code for a target device when it is not immed...
Definition: link_options.hpp:26
optional< span< char > > error
Information regarding errors in the logging process (i.e. its "standard error" stream) ...
Definition: link_options.hpp:138
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
device::id_t count()
Get the number of CUDA devices usable on the system (with the current CUDA library and kernel driver)...
Definition: miscellany.hpp:63
optional< fallback_strategy_for_binary_code_t > fallback_strategy_for_binary_code
Possible strategy for obtaining fully-compiled binary code when it is not simply available in the inp...
Definition: link_options.hpp:151
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149
optional< span< char > > info
Non-error information regarding the logging process (i.e. its "standard output" stream) ...
Definition: link_options.hpp:135
Prefer using existing fully-compiled (binary) code, for a compatible but not identical target device...
Definition: link_options.hpp:31
Definitions and utility functions relating to just-in-time compilation and linking of CUDA PTX code...
Prefer compiling available PTX code to produce fully-compiled binary code.
Definition: link_options.hpp:28
bool verbose
Control whether the info and error logging will be verbose.
Definition: link_options.hpp:141
Fundamental CUDA-related type definitions.
Classes representing specific and overall properties of CUDA devices.