eyalroz/cuda-api-wrappers/link__options_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_
 #define CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_

 #include "common_ptx_compilation_options.hpp"
 #include "device_properties.hpp"
 #include "types.hpp"

 #include <array>

 namespace cuda {

 class module_t;

 namespace link {

 enum fallback_strategy_for_binary_code_t {
     prefer_compiling_ptx            = 0,
     prefer_using_compatible_binary  = 1,
 };

 namespace detail_ {

 using option_t = CUjit_option;

 struct marshalled_options_t {
     using size_type = unsigned;

     constexpr static const size_type max_num_options { CU_JIT_NUM_OPTIONS };

 protected:
     ::std::array<option_t, max_num_options> option_buffer;
     ::std::array<void*, max_num_options> value_buffer;
     size_type count_ { 0 };
 public:
     void push_back(option_t option)
     {
         if (count_ >= max_num_options) {
             throw ::std::invalid_argument("Attempt to push back the same option a second time");
             // If each option is pushed back at most once, the count cannot exist the number
             // of possible options. In fact, it can't even reach it because some options contradict.
             //
             // Note: This check will not catch all repeat push-backs, nor the case of conflicting
             // options - the cuLink methods will catch those. We just want to avoid overflow.
         }
         option_buffer[count_] = option;
         count_++;
     }
 protected:
     template <typename I>
     void* process_value(typename ::std::enable_if<::std::is_integral<I>::value, I>::type value)
     {
         return reinterpret_cast<void*>(static_cast<uintptr_t>(value));
     }

     template <typename T>
     void* process_value(T* value)
     {
         return static_cast<void*>(value);
     }

     void* process_value(bool value) { return process_value<int>(value ? 1 : 0); }

     void* process_value(caching_mode_t<memory_operation_t::load> value)
     {
         using ut = typename ::std::underlying_type<caching_mode_t<memory_operation_t::load>>::type;
         return process_value(static_cast<ut>(value));
     }

 public:
     template <typename T>
     void push_back(option_t option, T value)
     {
         push_back(option);
         process_value(value);
         // Now set value_buffer[count-1]...
         value_buffer[count_-1] = process_value(value);
     }

     const option_t* options() const { return option_buffer.data(); }
     const void * const * values() const { return value_buffer.data(); }
     size_type count() const { return count_; }
 };

 } // namespace detail_

 struct options_t final : public rtc::common_ptx_compilation_options_t {

     struct {
         optional<span<char>> info;

         optional<span<char>> error;

         bool verbose;
     } logs;

     bool obtain_target_from_cuda_context { true };

     optional<fallback_strategy_for_binary_code_t> fallback_strategy_for_binary_code;

     // Ignoring the "internal purposes only" options;
     //
     //   CU_JIT_NEW_SM3X_OPT
     //   CU_JIT_FAST_COMPILE
     //   CU_JIT_GLOBAL_SYMBOL_NAMES
     //   CU_JIT_GLOBAL_SYMBOL_ADDRESSES
     //   CU_JIT_GLOBAL_SYMBOL_COUNT
     //
 };

 namespace detail_ {

 inline marshalled_options_t marshal(const options_t& link_options)
 {
     marshalled_options_t marshalled{};
     const auto& lo = link_options;

     if (lo.max_num_registers_per_thread) {
         marshalled.push_back(CU_JIT_MAX_REGISTERS, lo.max_num_registers_per_thread.value());
     }

     if (lo.min_num_threads_per_block) {
         marshalled.push_back(CU_JIT_THREADS_PER_BLOCK, lo.min_num_threads_per_block.value());
     }

     if (lo.logs.info) {
         marshalled.push_back(CU_JIT_INFO_LOG_BUFFER, lo.logs.info.value().data());
         marshalled.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, lo.logs.info.value().size());
     }

     if (lo.logs.error) {
         marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER, lo.logs.error.value().data());
         marshalled.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, lo.logs.error.value().size());
     }

     if (lo.optimization_level) {
         marshalled.push_back(CU_JIT_OPTIMIZATION_LEVEL, lo.optimization_level.value());
     }

     if (lo.obtain_target_from_cuda_context) {
         marshalled.push_back(CU_JIT_TARGET_FROM_CUCONTEXT);
     }
     else if (lo.specific_target) {
         marshalled.push_back(CU_JIT_TARGET, lo.specific_target.value().as_combined_number());
     }

     if (lo.fallback_strategy_for_binary_code) {
         marshalled.push_back(CU_JIT_FALLBACK_STRATEGY, lo.fallback_strategy_for_binary_code.value());
     }

     if (lo.generate_debug_info) {
         marshalled.push_back(CU_JIT_GENERATE_DEBUG_INFO);
     }

     if (lo.generate_source_line_info) {
         marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
     }

     if (lo.generate_source_line_info) {
         marshalled.push_back(CU_JIT_GENERATE_LINE_INFO);
     }

     if (lo.logs.verbose) {
         marshalled.push_back(CU_JIT_LOG_VERBOSE);
     }

     if (lo.default_load_caching_mode()) {
         marshalled.push_back(CU_JIT_CACHE_MODE, lo.default_load_caching_mode().value());
     }

     return marshalled;
 }

 } // namespace detail_

 // TODO: Compiler "output options":
 //
 // threads per block targeted
 // compilation wall time
 // amount written to info log

 } // namespace link

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_ASSEMBLY_AND_LINK_OPTIONS_HPP_
cuda::link::options_t
A convenience class for holding, setting and inspecting options for a CUDA binary code linking proces...
Definition: link_options.hpp:130

cuda::link::fallback_strategy_for_binary_code_t
fallback_strategy_for_binary_code_t
Possible strategies for obtaining fully-compiled binary code for a target device when it is not immed...
Definition: link_options.hpp:26

cuda::link::options_t::error
optional< span< char > > error
Information regarding errors in the logging process (i.e. its "standard error" stream) ...
Definition: link_options.hpp:138

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::device::count
device::id_t count()
Get the number of CUDA devices usable on the system (with the current CUDA library and kernel driver)...
Definition: miscellany.hpp:63

cuda::link::options_t::fallback_strategy_for_binary_code
optional< fallback_strategy_for_binary_code_t > fallback_strategy_for_binary_code
Possible strategy for obtaining fully-compiled binary code when it is not simply available in the inp...
Definition: link_options.hpp:151

cuda::rtc::common_ptx_compilation_options_t
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149

cuda::link::options_t::info
optional< span< char > > info
Non-error information regarding the logging process (i.e. its "standard output" stream) ...
Definition: link_options.hpp:135

cuda::link::prefer_using_compatible_binary
Prefer using existing fully-compiled (binary) code, for a compatible but not identical target device...
Definition: link_options.hpp:31

common_ptx_compilation_options.hpp
Definitions and utility functions relating to just-in-time compilation and linking of CUDA PTX code...

cuda::link::prefer_compiling_ptx
Prefer compiling available PTX code to produce fully-compiled binary code.
Definition: link_options.hpp:28

cuda::link::options_t::verbose
bool verbose
Control whether the info and error logging will be verbose.
Definition: link_options.hpp:141

types.hpp
Fundamental CUDA-related type definitions.

device_properties.hpp
Classes representing specific and overall properties of CUDA devices.