eyalroz/cuda-api-wrappers/compilation__options_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_
 #define CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_

 #include "cuda/api/detail/option_marshalling.hpp"

 #include "../api/device_properties.hpp"
 #include "../api/device.hpp"
 #include "../api/common_ptx_compilation_options.hpp"

 #include <unordered_map>
 #include <unordered_set>
 #include <sstream>
 #include <string>
 #include <vector>
 #include <cstring>
 #include <limits>
 #include <iterator>

 namespace cuda {

 namespace rtc {

 enum class cpp_dialect_t {
     cpp03 = 0,
     cpp11 = 1,
     cpp14 = 2,
     cpp17 = 3,
     last = cpp17
 };

 namespace detail_ {

 static constexpr const size_t language_dialect_name_length { 5 };
 constexpr const char* cpp_dialect_names[] =  {
     "c++03",
     "c++11",
     "c++14",
     "c++17",
 };

 inline cpp_dialect_t cpp_dialect_from_name(const char* dialect_name) noexcept(false)
 {
     for(auto known_dialect = static_cast<int>(cpp_dialect_t::cpp03);
         known_dialect <= static_cast<int>(cpp_dialect_t::last);
         known_dialect++)
     {
         if (strcmp(detail_::cpp_dialect_names[known_dialect], dialect_name) == 0) {
             return static_cast<cpp_dialect_t>(known_dialect);
         }
     }
     throw ::std::invalid_argument(::std::string("No C++ dialect named \"") + dialect_name + '"');
 }

 } // namespace detail_

 namespace error {

 enum handling_method_t { raise_error = 0, suppress = 1, warn = 2 };

 using number_t = unsigned;

 namespace detail_ {

 inline const char* option_name_part(handling_method_t method)
 {
     static constexpr const char* parts[] = { "error", "suppress", "warn" };
     return parts[method];
 }

 } // namespace detail_

 } // namespace error

 template <source_kind_t Kind>
 struct compilation_options_base_t {
     template <typename T>
     using optional = cuda::optional<T>;

     ::std::unordered_set<cuda::device::compute_capability_t> targets_;

 public:
     // TODO: Drop the following methods and make targets a custom
     // inner class which can assigned, added to or subtracted from

     compilation_options_base_t& add_target(device::compute_capability_t compute_capability)
     {
         targets_.clear();
         targets_.insert(compute_capability);
         return *this;
     }

     compilation_options_base_t& set_target(device::compute_capability_t compute_capability)
     {
         targets_.clear();
         add_target(compute_capability);
         return *this;
     }

     compilation_options_base_t& set_target(device_t device)
     {
         return set_target(device.compute_capability());
     }
 }; // compilation_options_base_t

 enum : rtc::optimization_level_t {
     O0 = 0,
     no_optimization = O0,
     O1 = 1,
     O2 = 2,
     O3 = 3,
     maximum_optimization = O3
 };

 template <source_kind_t Kind>
 class compilation_options_t;

 template <>
 class compilation_options_t<ptx> final :
     public compilation_options_base_t<ptx>,
     public common_ptx_compilation_options_t {
 public:
     using parent = compilation_options_base_t<ptx>;
     using parent::parent;

     bool parse_without_code_generation { false };

     bool allow_expensive_optimizations_below_O2 { false };

     bool compile_as_tools_patch { false };

     bool compile_extensible_whole_program { false };

     bool use_fused_multiply_add { true };

     bool verbose { false };

     bool dont_merge_basicblocks { false };

     bool disable_warnings { false };

     bool disable_optimizer_constants { false };

     bool return_at_end_of_kernel { false };

     bool preserve_variable_relocations { false };

     struct {
         bool double_precision_ops { false };
         bool local_memory_use { false };
         bool registers_spill_to_local_memory { false };
         bool indeterminable_stack_size { true };
         // Does the PTX compiler library actually support this? ptxas does, but the PTX compilation API
         // doesn't mention it
         bool double_demotion { false };
     } situation_warnings;

     struct {
         optional<rtc::ptx_register_count_t> kernel {};
         optional<rtc::ptx_register_count_t> device_function {};
     } maximum_register_counts;

     struct caching_mode_spec_t {
         optional<caching_mode_t<memory_operation_t::load>> load {};
         optional<caching_mode_t<memory_operation_t::store>> store {};
     };
     struct {
         caching_mode_spec_t default_ {};
         caching_mode_spec_t forced {};
     } caching_modes;

     optional<caching_mode_t<memory_operation_t::load>>& default_load_caching_mode() override
     {
         return caching_modes.default_.load;
     }

     optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode() const override
     {
         return caching_modes.default_.load;
     }

     ::std::vector<::std::string> mangled_entry_function_names;

     ::std::vector<::std::string>& entries();
     ::std::vector<::std::string>& kernels();
     ::std::vector<::std::string>& kernel_names();
 }; // compilation_options_t<ptx>

 template <>
 class compilation_options_t<cuda_cpp> final :
     public compilation_options_base_t<cuda_cpp>,
     public common_ptx_compilation_options_t
 {
 public:
     using parent = compilation_options_base_t<cuda_cpp>;
     using parent::parent;

     bool compile_extensible_whole_program { false };

     bool optimize_device_code_in_debug_mode { false };

     bool support_128bit_integers { false };

     bool indicate_function_inlining { false };

 #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200

     bool compiler_self_identification { false };
 #endif // #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200

     bool syntax_check_only { false };

     bool less_builtins { false };

     optional<size_t> maximum_register_count { };

     bool flush_denormal_floats_to_zero { false };

     bool use_precise_square_root { true };

     bool use_precise_division { true };

     bool use_fused_multiply_add { true };

     bool use_fast_math { false };

     bool link_time_optimization { false };

     bool source_dirs_in_include_path { true };

     bool extra_device_vectorization { false };

     optional<cpp_dialect_t> language_dialect { };

     ::std::unordered_set<::std::string> no_value_defines;

     ::std::unordered_set<::std::string> undefines;

     ::std::unordered_map<::std::string,::std::string> valued_defines;

     bool disable_warnings { false };

     bool assume_restrict { false };

     bool default_execution_space_is_device { false };

     bool display_error_numbers { true };

     ::std::string ptxas;

     ::std::vector<::std::string> additional_include_paths;

     ::std::vector<::std::string> preinclude_files;

     bool builtin_move_and_forward { true };

     bool increase_stack_limit_to_max { true };

     bool builtin_initializer_list { true };

     ::std::vector<::std::string> extra_options;

     ::std::unordered_map<error::number_t, error::handling_method_t> error_handling_overrides;

 public: // "shorthands" for more complex option setting

     compilation_options_t& clear_language_dialect()
     {
         language_dialect = {};
         return *this;
     }

     compilation_options_t& set_language_dialect(cpp_dialect_t dialect)
     {
         language_dialect = dialect;
         return *this;
     }

     compilation_options_t& set_language_dialect(const char* dialect_name)
     {
         return (dialect_name == nullptr or *dialect_name == '\0') ?
             clear_language_dialect() :
             set_language_dialect(detail_::cpp_dialect_from_name(dialect_name));
     }

     compilation_options_t& set_language_dialect(const ::std::string& dialect_name)
     {
         return dialect_name.empty() ?
             clear_language_dialect() :
             set_language_dialect(dialect_name.c_str());
     }

     compilation_options_t& suppress_error(error::number_t error_number)
     {
         error_handling_overrides[error_number] = error::suppress;
         return *this;
     }

     compilation_options_t& treat_as_error(error::number_t error_number)
     {
         error_handling_overrides[error_number] = error::raise_error;
         return *this;
     }

     compilation_options_t& warn_about(error::number_t error_number)
     {
         error_handling_overrides[error_number] = error::warn;
         return *this;
     }
 }; // compilation_options_t<cuda_cpp>

 template <typename CompilationOptions>
 inline ::std::string render(const CompilationOptions& opts)
 {
     return marshalling::render(opts);
 }

 } // namespace rtc

 namespace marshalling {

 namespace detail_ {

 template <typename MarshalTarget, typename Delimiter>
 struct gadget<rtc::compilation_options_t<ptx>, MarshalTarget, Delimiter> {
     static void process(
         const rtc::compilation_options_t<ptx> &opts,
         MarshalTarget &marshalled, Delimiter delimiter,
         bool need_delimiter_after_last_option)
     {
         opt_start_t<Delimiter> opt_start { delimiter };
         // TODO: Consider taking an option to be verbose in specifying compilation flags, and setting option values
         //  even when they are the compiler defaults.

         // flags
         if (opts.generate_relocatable_device_code)  { marshalled << opt_start << "--compile-only";                  }
         if (opts.compile_as_tools_patch)            { marshalled << opt_start << "--compile-as-tools-patch";        }
         if (opts.generate_debug_info)               { marshalled << opt_start << "--device-debug";                  }
         if (opts.generate_source_line_info)         { marshalled << opt_start << "--generate-line-info";            }
         if (opts.compile_extensible_whole_program)  { marshalled << opt_start << "--extensible-whole-program";      }
         if (not opts.use_fused_multiply_add)        { marshalled << opt_start << "--fmad false";                    }
         if (opts.verbose)                           { marshalled << opt_start << "--verbose";                       }
         if (opts.dont_merge_basicblocks)            { marshalled << opt_start << "--dont-merge-basicblocks";        }
         {
             const auto& osw = opts.situation_warnings;
             if (osw.double_precision_ops)            { marshalled << opt_start << "--warn-on-double-precision-use";   }
             if (osw.local_memory_use)                { marshalled << opt_start << "--warn-on-local-memory-usage";     }
             if (osw.registers_spill_to_local_memory) { marshalled << opt_start << "--warn-on-spills";                 }
             if (not osw.indeterminable_stack_size)   { marshalled << opt_start << "--suppress-stack-size-warning";    }
             if (osw.double_demotion)                 { marshalled << opt_start << "--suppress-double-demote-warning"; }
         }
         if (opts.disable_warnings)                  { marshalled << opt_start << "--disable-warnings";              }
         if (opts.disable_optimizer_constants)       { marshalled << opt_start << "--disable-optimizer-constants";   }


         if (opts.return_at_end_of_kernel)           { marshalled << opt_start << "--return-at-end";                 }
         if (opts.preserve_variable_relocations)     { marshalled << opt_start << "--preserve-relocs";               }

         // Non-flag single-value options

         if (opts.optimization_level) {
             marshalled << opt_start << "--opt-level" << opts.optimization_level.value();
             if (opts.optimization_level.value() < rtc::O2
                 and opts.allow_expensive_optimizations_below_O2)
             {
                 marshalled << opt_start << "--allow-expensive-optimizations";
             }
         }

         if (opts.maximum_register_counts.kernel) {
             marshalled << opt_start << "--maxrregcount " << opts.maximum_register_counts.kernel.value();
         }
         if (opts.maximum_register_counts.device_function) {
             marshalled << opt_start << "--device-function-maxrregcount " << opts.maximum_register_counts.device_function.value();
         }

         {
             const auto& ocm = opts.caching_modes;
             if (ocm.default_.load)  { marshalled << opt_start << "--def-load-cache "    << ocm.default_.load.value();  }
             if (ocm.default_.store) { marshalled << opt_start << "--def-store-cache "   << ocm.default_.store.value(); }
             if (ocm.forced.load)    { marshalled << opt_start << "--force-load-cache "  << ocm.forced.load.value();    }
             if (ocm.forced.store)   { marshalled << opt_start << "--force-store-cache " << ocm.forced.store.value();   }
         }

         // Multi-value options

         for(const auto& target : opts.targets_) {
             auto prefix = opts.parse_without_code_generation ? "compute" : "sm";
             marshalled << opt_start << "--gpu-name=" << prefix << '_'  << target.as_combined_number();
         }

         if (not opts.mangled_entry_function_names.empty()) {
             marshalled << opt_start << "--entry";
             bool first = true;
             for (const auto &entry: opts.mangled_entry_function_names) {
                 if (first) { first = false; }
                 else { marshalled << ','; }
                 marshalled << entry;
             }
         }

         if (need_delimiter_after_last_option) {
             marshalled << opt_start; // If no options were marshalled, this does nothing
         }
     }
 };

 template <typename MarshalTarget, typename Delimiter>
 struct gadget<rtc::compilation_options_t<cuda_cpp>, MarshalTarget, Delimiter> {
     static void process(
         const rtc::compilation_options_t<cuda_cpp>& opts, MarshalTarget& marshalled, Delimiter delimiter,
         bool need_delimiter_after_last_option)
     {
         opt_start_t<Delimiter> opt_start { delimiter };
         if (opts.generate_relocatable_device_code)  { marshalled << opt_start << "--relocatable-device-code=true";      }
         if (opts.compile_extensible_whole_program)  { marshalled << opt_start << "--extensible-whole-program=true";     }
         if (opts.generate_debug_info)               { marshalled << opt_start << "--device-debug";                      }
         if (opts.generate_source_line_info)         { marshalled << opt_start << "--generate-line-info";                }
         if (opts.support_128bit_integers)           { marshalled << opt_start << "--device-int128";                     }
         if (opts.indicate_function_inlining)        { marshalled << opt_start << "--optimization-info=inline";          }
 #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
         if (opts.compiler_self_identification)      { marshalled << opt_start << "--version-ident=true";                }
 #endif // CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
         if (opts.syntax_check_only)                 { marshalled << opt_start << "--fdevice-syntax-only";               }
         if (opts.less_builtins)                     { marshalled << opt_start << "--minimal";                           }
         if (not opts.builtin_initializer_list)      { marshalled << opt_start << "--builtin-initializer-list=false";    }
         if (not opts.source_dirs_in_include_path)   { marshalled << opt_start << "--no-source-include";                 }
         if (opts.extra_device_vectorization)        { marshalled << opt_start << "--extra-device-vectorization";        }
         if (opts.disable_warnings)                  { marshalled << opt_start << "--disable-warnings";                  }
         if (opts.assume_restrict)                   { marshalled << opt_start << "--restrict";                          }
         if (opts.default_execution_space_is_device) { marshalled << opt_start << "--device-as-default-execution-space"; }
         if (not opts.display_error_numbers)         { marshalled << opt_start << "--no-display-error-number";           }
         if (not opts.builtin_move_and_forward)      { marshalled << opt_start << "--builtin-move-forward=false";        }
         if (not opts.increase_stack_limit_to_max)   { marshalled << opt_start << "--modify-stack-limit=false";          }
         if (opts.link_time_optimization)            { marshalled << opt_start << "--dlink-time-opt";                    }
         if (opts.use_fast_math)                     { marshalled << opt_start << "--use_fast_math";                     }
         else {
             if (opts.flush_denormal_floats_to_zero) { marshalled << opt_start << "--ftz";                               }
             if (not opts.use_precise_square_root)   { marshalled << opt_start << "--prec-sqrt=false";                   }
             if (not opts.use_precise_division)      { marshalled << opt_start << "--prec-div=false";                    }
             if (not opts.use_fused_multiply_add)    { marshalled << opt_start << "--fmad=false";                        }
         }
         if (opts.optimize_device_code_in_debug_mode) {
             marshalled << opt_start << "--dopt=on";
         }
         if (not opts.ptxas.empty()) {
             marshalled << opt_start << "--ptxas-options=" << opts.ptxas;

         }

         if (opts.language_dialect) {
             marshalled << opt_start << "--std=" << rtc::detail_::cpp_dialect_names[static_cast<unsigned>(opts.language_dialect.value())];
         }

         if (opts.maximum_register_count) {
             marshalled << opt_start << "--maxrregcount=" << opts.maximum_register_count.value();
         }

         // Multi-value options

         for(const auto& target : opts.targets_) {
     #if CUDA_VERSION < 11000
             marshalled << opt_start << "--gpu-architecture=compute_" << target.as_combined_number();
     #else
             marshalled << opt_start << "--gpu-architecture=sm_" << target.as_combined_number();
     #endif
         }

         for(const auto& def : opts.undefines) {
             marshalled << opt_start << "-U" << def;
             // Note: Could alternatively use "--undefine-macro=" instead of "-D"
         }


         for(const auto& def : opts.no_value_defines) {
             marshalled << opt_start << "-D" << def;
             // Note: Could alternatively use "--define-macro=" instead of "-D"
         }

         for(const auto& def : opts.valued_defines) {
             marshalled << opt_start << "-D" << def.first << '=' << def.second;
         }

         for(const auto& path : opts.additional_include_paths) {
             marshalled << opt_start << "--include-path=" << path;
         }

         for(const auto& preinclude_file : opts.preinclude_files) {
             marshalled << opt_start << "--pre-include=" << preinclude_file;
         }

         for(const auto& override : opts.error_handling_overrides) {
             marshalled
                 << opt_start << "--diag-" << rtc::error::detail_::option_name_part(override.second)
                 << '=' << override.first ;
         }

         for(const auto& extra_opt : opts.extra_options) {
             marshalled << opt_start << extra_opt;
         }

         if (need_delimiter_after_last_option) {
             marshalled << opt_start; // If no options were marshalled, this does nothing
         }
     }
 };

 } // namespace detail_

 } // namespace marshalling

 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_
cuda::rtc::compilation_options_t< ptx >::mangled_entry_function_names
::std::vector<::std::string > mangled_entry_function_names
Specifies the GPU kernels, or __global__ functions in CUDA-C++ terms, or .entry functions in PTX term...
Definition: compilation_options.hpp:283

cuda::rtc::compilation_options_t< cuda_cpp >::default_execution_space_is_device
bool default_execution_space_is_device
Assume functions without an explicit specification of their execution space are __device__ rather tha...
Definition: compilation_options.hpp:428

cuda::rtc::compilation_options_t< cuda_cpp >::ptxas
::std::string ptxas
Extra options for the PTX compiler (a.k.a. "PTX optimizing assembler").
Definition: compilation_options.hpp:434

cuda::rtc::compilation_options_t< cuda_cpp >::clear_language_dialect
compilation_options_t & clear_language_dialect()
Let the compiler interpret the program source code using its default-assumption for the C++ language ...
Definition: compilation_options.hpp:499

cuda::rtc::compilation_options_t< ptx >::allow_expensive_optimizations_below_O2
bool allow_expensive_optimizations_below_O2
Allow the JIT compiler to perform expensive optimizations using maximum available resources (memory a...
Definition: compilation_options.hpp:182

cuda::rtc::compilation_options_t< cuda_cpp >::use_fused_multiply_add
bool use_fused_multiply_add
Enables (disables) the contraction of floating-point multiplies and adds/subtracts into floating-poin...
Definition: compilation_options.hpp:388

cuda::cuda_cpp
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41

cuda::rtc::compilation_options_t< cuda_cpp >::valued_defines
::std::unordered_map<::std::string,::std::string > valued_defines
Preprocessor macros to have the compiler define to specific values.
Definition: compilation_options.hpp:418

cuda::rtc::compilation_options_t< ptx >::compile_as_tools_patch
bool compile_as_tools_patch
Compile as patch code for CUDA tools.
Definition: compilation_options.hpp:193

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::rtc::error::handling_method_t
handling_method_t
Possible ways of handling a potentially problematic finding by the compiler in the program source cod...
Definition: compilation_options.hpp:67

cuda::rtc::common_ptx_compilation_options_t::generate_relocatable_device_code
bool generate_relocatable_device_code
Generate relocatable code that can be linked with other relocatable device code.
Definition: common_ptx_compilation_options.hpp:195

cuda::rtc::compilation_options_t< cuda_cpp >::suppress_error
compilation_options_t & suppress_error(error::number_t error_number)
Ignore compiler findings of the specified number (rather than warnings about them or raising an error...
Definition: compilation_options.hpp:531

cuda::rtc::compilation_options_t< cuda_cpp >::optimize_device_code_in_debug_mode
bool optimize_device_code_in_debug_mode
If debug mode is enabled, perform limited optimizations of device code rather than none at all...
Definition: compilation_options.hpp:313

cuda::rtc::compilation_options_t< cuda_cpp >::extra_device_vectorization
bool extra_device_vectorization
Enables more aggressive device code vectorization in the LTO IR optimizer.
Definition: compilation_options.hpp:406

cuda::rtc::error::number_t
unsigned number_t
Errors, or problematic findings, by the compiler are identified by a number of this type...
Definition: compilation_options.hpp:70

cuda::rtc::compilation_options_t< ptx >::compile_extensible_whole_program
bool compile_extensible_whole_program
Expecting only whole-programs to be directly usable, allow some calls to not be resolved until device...
Definition: compilation_options.hpp:199

cuda::rtc::compilation_options_t< cuda_cpp >::use_fast_math
bool use_fast_math
Make use of fast math operations.
Definition: compilation_options.hpp:392

cuda::device::compute_capability_t
A numeric designator of the computational capabilities of a CUDA device.
Definition: device_properties.hpp:75

cuda::rtc::common_ptx_compilation_options_t::optimization_level
optional< optimization_level_t > optimization_level
Compilation optimization level (as in -O1, -O2 etc.)
Definition: common_ptx_compilation_options.hpp:158

cuda::rtc::compilation_options_t< cuda_cpp >::support_128bit_integers
bool support_128bit_integers
Allow the use of the 128-bit __int128 type in the code.
Definition: compilation_options.hpp:318

cuda::rtc::compilation_options_t< cuda_cpp >::set_language_dialect
compilation_options_t & set_language_dialect(const char *dialect_name)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:514

cuda::rtc::compilation_options_t< ptx >::default_load_caching_mode
optional< caching_mode_t< memory_operation_t::load > > default_load_caching_mode() const override
Get the caching mode the compiler will be told to use as the default, for load instructions which don...
Definition: compilation_options.hpp:272

cuda::rtc::compilation_options_t
Options to be passed to one of the NVIDIA JIT compilers along with a program&#39;s source code...
Definition: compilation_options.hpp:164

cuda::rtc::compilation_options_t< cuda_cpp >::link_time_optimization
bool link_time_optimization
Do not compile fully into PTX/Cubin.
Definition: compilation_options.hpp:399

cuda::rtc::compilation_options_t< cuda_cpp >
Options for JIT-compilation of CUDA C++ code.
Definition: compilation_options.hpp:292

cuda::rtc::compilation_options_t< cuda_cpp >::set_language_dialect
compilation_options_t & set_language_dialect(cpp_dialect_t dialect)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:507

cuda::rtc::compilation_options_t< cuda_cpp >::compile_extensible_whole_program
bool compile_extensible_whole_program
Do extensible whole program compilation of device code.
Definition: compilation_options.hpp:305

cuda::rtc::compilation_options_t< ptx >::default_load_caching_mode
optional< caching_mode_t< memory_operation_t::load > > & default_load_caching_mode() override
Get a reference to the caching mode the compiler will be told to use as the default, for load instructions which don&#39;t explicitly specify a particular caching mode.
Definition: compilation_options.hpp:265

cuda::rtc::compilation_options_t< cuda_cpp >::disable_warnings
bool disable_warnings
Have the compiler treat all warnings as though they were suppressed, and print nothing.
Definition: compilation_options.hpp:421

cuda::rtc::common_ptx_compilation_options_t
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149

cuda::rtc::compilation_options_t< cuda_cpp >::language_dialect
optional< cpp_dialect_t > language_dialect
The dialect of C++ as which the compiler will be forced to interpret the program source code...
Definition: compilation_options.hpp:409

cuda::rtc::compilation_options_t< cuda_cpp >::additional_include_paths
::std::vector<::std::string > additional_include_paths
A sequence of directories to be searched for headers.
Definition: compilation_options.hpp:445

cuda::rtc::compilation_options_t< cuda_cpp >::set_language_dialect
compilation_options_t & set_language_dialect(const ::std::string &dialect_name)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:522

cuda::rtc::compilation_options_t< ptx >::maximum_register_counts
struct cuda::rtc::compilation_options_t< ptx >::@21 maximum_register_counts
Limits on the number of registers which generated object code (of different kinds) is allowed to use...

cuda::rtc::compilation_options_t< cuda_cpp >::undefines
::std::unordered_set<::std::string > undefines
Preprocessor macros to tell the compiler to specifically _un_define.
Definition: compilation_options.hpp:415

cuda::rtc::compilation_options_t< ptx >::situation_warnings
struct cuda::rtc::compilation_options_t< ptx >::@20 situation_warnings
Warnings about situations likely to result in poor performance or other problems. ...

cuda::rtc::compilation_options_t< cuda_cpp >::syntax_check_only
bool syntax_check_only
Stop compilation after the front-end has verified the program&#39;s syntax.
Definition: compilation_options.hpp:339

cuda::rtc::compilation_options_t< ptx >::return_at_end_of_kernel
bool return_at_end_of_kernel
Prevents the optimizing away of the return instruction at the end of a program (a kernel...
Definition: compilation_options.hpp:225

cuda::rtc::compilation_options_t< ptx >::disable_optimizer_constants
bool disable_optimizer_constants
Disable use of the "optimizer constant bank" feature.
Definition: compilation_options.hpp:221

cuda::rtc::common_ptx_compilation_options_t::generate_debug_info
bool generate_debug_info
Generate debugging information associating SASS instructions to locations in the source, embedding it within the compilation output (-g)
Definition: common_ptx_compilation_options.hpp:169

cuda::rtc::compilation_options_t< cuda_cpp >::use_precise_division
bool use_precise_division
For single-precision floating-point division and reciprocals, use IEEE round-to-nearest mode or use a...
Definition: compilation_options.hpp:381

cuda::rtc::compilation_options_t< cuda_cpp >::assume_restrict
bool assume_restrict
Treat all kernel pointer parameters as if they had the restrict (or __restrict) qualifier.
Definition: compilation_options.hpp:424

cuda::rtc::compilation_options_base_t::add_target
compilation_options_base_t & add_target(device::compute_capability_t compute_capability)
Have the compilation also target a specific compute capability.
Definition: compilation_options.hpp:117

cuda::rtc::compilation_options_t< cuda_cpp >::builtin_move_and_forward
bool builtin_move_and_forward
Provide builtin definitions of ::std::move and ::std::forward.
Definition: compilation_options.hpp:464

cuda::rtc::common_ptx_compilation_options_t::generate_source_line_info
bool generate_source_line_info
Generate indications of which PTX/SASS instructions correspond to which lines of the source code...
Definition: common_ptx_compilation_options.hpp:165

cuda::rtc::compilation_options_t< cuda_cpp >::no_value_defines
::std::unordered_set<::std::string > no_value_defines
Preprocessor macros to have the compiler define, without specifying a particular value.
Definition: compilation_options.hpp:412

cuda::rtc::compilation_options_t< ptx >::preserve_variable_relocations
bool preserve_variable_relocations
Generate relocatable references for variables and preserve relocations generated for them in the link...
Definition: compilation_options.hpp:229

cuda::rtc::compilation_options_t< ptx >::dont_merge_basicblocks
bool dont_merge_basicblocks
Prevent the compiler from merging consecutive basic blocks (https://en.wikipedia.org/wiki/Basic_block...
Definition: compilation_options.hpp:215

cuda::rtc::compilation_options_t< cuda_cpp >::builtin_initializer_list
bool builtin_initializer_list
Provide builtin definitions of ::std::initializer_list class and member functions.
Definition: compilation_options.hpp:482

cuda::rtc::compilation_options_t< ptx >::parse_without_code_generation
bool parse_without_code_generation
Makes the PTX compiler run without producing any CUBIN output (for PTX verification only) ...
Definition: compilation_options.hpp:178

cuda::device_t::compute_capability
device::compute_capability_t compute_capability() const
Obtains the device&#39;s compute capability; see cuda::device::compute_capability_t.
Definition: device.hpp:415

cuda::ptx
NVIDIA&#39;s architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44

cuda::rtc::compilation_options_t< ptx >::verbose
bool verbose
Print code generation statistics along with the compilation log.
Definition: compilation_options.hpp:206

cuda::rtc::compilation_options_t< cuda_cpp >::use_precise_square_root
bool use_precise_square_root
For single-precision floating-point square root, use IEEE round-to-nearest mode or use a faster appro...
Definition: compilation_options.hpp:374

cuda::rtc::compilation_options_base_t::targets_
::std::unordered_set< cuda::device::compute_capability_t > targets_
Target devices in terms of CUDA compute capability.
Definition: compilation_options.hpp:105

cuda::rtc::compilation_options_t< cuda_cpp >::warn_about
compilation_options_t & warn_about(error::number_t error_number)
Treat compiler findings of the specified number as warnings (rather than raising an error or ignoring...
Definition: compilation_options.hpp:547

cuda::rtc::compilation_options_t< cuda_cpp >::preinclude_files
::std::vector<::std::string > preinclude_files
Header files to preinclude during preprocessing of the source.
Definition: compilation_options.hpp:457

cuda::rtc::compilation_options_t< ptx >::default_
caching_mode_spec_t default_
The caching mode to be used for instructions which don&#39;t specify a caching mode.
Definition: compilation_options.hpp:257

cuda::rtc::compilation_options_t< cuda_cpp >::treat_as_error
compilation_options_t & treat_as_error(error::number_t error_number)
Treat compiler findings of the specified number as an error (rather than suppressing them or just war...
Definition: compilation_options.hpp:539

cuda::rtc::compilation_options_t< cuda_cpp >::flush_denormal_floats_to_zero
bool flush_denormal_floats_to_zero
When performing single-precision floating-point operations, flush denormal values to zero...
Definition: compilation_options.hpp:367

cuda::rtc::compilation_options_t< ptx >::disable_warnings
bool disable_warnings
The equivalent of suppressing all findings which currently trigger a warning.
Definition: compilation_options.hpp:218

cuda::rtc::compilation_options_t< cuda_cpp >::display_error_numbers
bool display_error_numbers
Display (error) numbers for warning (and error?) messages, in addition to the message itself...
Definition: compilation_options.hpp:431

cuda::rtc::compilation_options_base_t
Compilation options common to all kinds of JIT-compilable programs.
Definition: compilation_options.hpp:86

cuda::rtc::compilation_options_t< cuda_cpp >::increase_stack_limit_to_max
bool increase_stack_limit_to_max
Use setrlimit() to increase the stack size to the maximum the OS allows.
Definition: compilation_options.hpp:475

cuda::device_t
Wrapper class for a CUDA device.
Definition: device.hpp:135

cuda::rtc::compilation_options_t< ptx >::use_fused_multiply_add
bool use_fused_multiply_add
Enable the contraction of multiplcations-followed-by-additions (or subtractions) into single fused in...
Definition: compilation_options.hpp:203

cuda::rtc::compilation_options_t< ptx >
Options for JIT-compilation of CUDA PTX code.
Definition: compilation_options.hpp:168

cuda::rtc::compilation_options_t< cuda_cpp >::indicate_function_inlining
bool indicate_function_inlining
emit a remark when a function is inlined
Definition: compilation_options.hpp:323

cuda::rtc::compilation_options_t< cuda_cpp >::source_dirs_in_include_path
bool source_dirs_in_include_path
Implicitly add the directories of source files (TODO: Which source files?) as include file search pat...
Definition: compilation_options.hpp:403

cuda::rtc::compilation_options_t< cuda_cpp >::extra_options
::std::vector<::std::string > extra_options
Support for additional, arbitrary options which may not be covered by other fields in this class (e...
Definition: compilation_options.hpp:491

cuda::rtc::compilation_options_t< cuda_cpp >::less_builtins
bool less_builtins
Have the compiler not provide support for various builtins:
Definition: compilation_options.hpp:349

cuda::rtc::optimization_level_t
int optimization_level_t
The type used to specify the intensity, and extent of allowed implication, of optimization efforts...
Definition: common_ptx_compilation_options.hpp:26

cuda::rtc::compilation_options_t< cuda_cpp >::maximum_register_count
optional< size_t > maximum_register_count
Specify the maximum amount of registers that GPU functions can use.
Definition: compilation_options.hpp:360