cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
compilation_options.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_
9 #define CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_
10 
11 #include "cuda/api/detail/option_marshalling.hpp"
12 
13 #include "../api/device_properties.hpp"
14 #include "../api/device.hpp"
15 #include "../api/common_ptx_compilation_options.hpp"
16 
17 #include <unordered_map>
18 #include <unordered_set>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 #include <cstring>
23 #include <limits>
24 #include <iterator>
25 
26 namespace cuda {
27 
28 namespace rtc {
29 
30 enum class cpp_dialect_t {
31  cpp03 = 0,
32  cpp11 = 1,
33  cpp14 = 2,
34  cpp17 = 3,
35  last = cpp17
36 };
37 
38 namespace detail_ {
39 
40 static constexpr const size_t language_dialect_name_length { 5 };
41 constexpr const char* cpp_dialect_names[] = {
42  "c++03",
43  "c++11",
44  "c++14",
45  "c++17",
46 };
47 
48 inline cpp_dialect_t cpp_dialect_from_name(const char* dialect_name) noexcept(false)
49 {
50  for(auto known_dialect = static_cast<int>(cpp_dialect_t::cpp03);
51  known_dialect <= static_cast<int>(cpp_dialect_t::last);
52  known_dialect++)
53  {
54  if (strcmp(detail_::cpp_dialect_names[known_dialect], dialect_name) == 0) {
55  return static_cast<cpp_dialect_t>(known_dialect);
56  }
57  }
58  throw ::std::invalid_argument(::std::string("No C++ dialect named \"") + dialect_name + '"');
59 }
60 
61 } // namespace detail_
62 
63 namespace error {
64 
67 enum handling_method_t { raise_error = 0, suppress = 1, warn = 2 };
68 
70 using number_t = unsigned;
71 
72 namespace detail_ {
73 
74 inline const char* option_name_part(handling_method_t method)
75 {
76  static constexpr const char* parts[] = { "error", "suppress", "warn" };
77  return parts[method];
78 }
79 
80 } // namespace detail_
81 
82 } // namespace error
83 
85 template <source_kind_t Kind>
87  template <typename T>
88  using optional = cuda::optional<T>;
89 
105  ::std::unordered_set<cuda::device::compute_capability_t> targets_;
106 
107 public:
108  // TODO: Drop the following methods and make targets a custom
109  // inner class which can assigned, added to or subtracted from
110 
118  {
119  targets_.clear();
120  targets_.insert(compute_capability);
121  return *this;
122  }
123 
130  compilation_options_base_t& set_target(device::compute_capability_t compute_capability)
132  {
133  targets_.clear();
134  add_target(compute_capability);
135  return *this;
136  }
137 
138  compilation_options_base_t& set_target(device_t device)
139  {
140  return set_target(device.compute_capability());
141  }
143 }; // compilation_options_base_t
144 
149  O0 = 0,
150  no_optimization = O0,
151  O1 = 1,
152  O2 = 2,
153  O3 = 3,
154  maximum_optimization = O3
155 };
156 
163 template <source_kind_t Kind>
165 
167 template <>
169  public compilation_options_base_t<ptx>,
171 public:
173  using parent = compilation_options_base_t<ptx>;
174  using parent::parent;
176 
178  bool parse_without_code_generation { false };
179 
182  bool allow_expensive_optimizations_below_O2 { false };
183 
193  bool compile_as_tools_patch { false };
194 
199  bool compile_extensible_whole_program { false };
200 
203  bool use_fused_multiply_add { true };
204 
206  bool verbose { false };
207 
215  bool dont_merge_basicblocks { false };
216 
218  bool disable_warnings { false };
219 
221  bool disable_optimizer_constants { false };
222 
225  bool return_at_end_of_kernel { false };
226 
229  bool preserve_variable_relocations { false };
230 
233  struct {
234  bool double_precision_ops { false };
235  bool local_memory_use { false };
236  bool registers_spill_to_local_memory { false };
237  bool indeterminable_stack_size { true };
238  // Does the PTX compiler library actually support this? ptxas does, but the PTX compilation API
239  // doesn't mention it
240  bool double_demotion { false };
241  } situation_warnings;
242 
245  struct {
246  optional<rtc::ptx_register_count_t> kernel {};
247  optional<rtc::ptx_register_count_t> device_function {};
248  } maximum_register_counts;
249 
251  struct caching_mode_spec_t {
252  optional<caching_mode_t<memory_operation_t::load>> load {};
253  optional<caching_mode_t<memory_operation_t::store>> store {};
254  };
255  struct {
257  caching_mode_spec_t default_ {};
260  caching_mode_spec_t forced {};
261  } caching_modes;
262 
265  optional<caching_mode_t<memory_operation_t::load>>& default_load_caching_mode() override
266  {
267  return caching_modes.default_.load;
268  }
269 
272  optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode() const override
273  {
274  return caching_modes.default_.load;
275  }
276 
283  ::std::vector<::std::string> mangled_entry_function_names;
284 
285  ::std::vector<::std::string>& entries();
286  ::std::vector<::std::string>& kernels();
287  ::std::vector<::std::string>& kernel_names();
288 }; // compilation_options_t<ptx>
289 
291 template <>
293  public compilation_options_base_t<cuda_cpp>,
295 {
296 public:
298  using parent::parent;
299 
305  bool compile_extensible_whole_program { false };
306 
313  bool optimize_device_code_in_debug_mode { false };
314 
318  bool support_128bit_integers { false };
319 
323  bool indicate_function_inlining { false };
324 
325 #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
326 
330  bool compiler_self_identification { false };
331 #endif // #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
332 
339  bool syntax_check_only { false };
340 
349  bool less_builtins { false };
350 
360  optional<size_t> maximum_register_count { };
361 
367  bool flush_denormal_floats_to_zero { false };
368 
374  bool use_precise_square_root { true };
375 
381  bool use_precise_division { true };
382 
388  bool use_fused_multiply_add { true };
389 
392  bool use_fast_math { false };
393 
399  bool link_time_optimization { false };
400 
403  bool source_dirs_in_include_path { true };
404 
406  bool extra_device_vectorization { false };
407 
409  optional<cpp_dialect_t> language_dialect { };
410 
412  ::std::unordered_set<::std::string> no_value_defines;
413 
415  ::std::unordered_set<::std::string> undefines;
416 
418  ::std::unordered_map<::std::string,::std::string> valued_defines;
419 
421  bool disable_warnings { false };
422 
424  bool assume_restrict { false };
425 
428  bool default_execution_space_is_device { false };
429 
431  bool display_error_numbers { true };
432 
434  ::std::string ptxas;
435 
445  ::std::vector<::std::string> additional_include_paths;
446 
457  ::std::vector<::std::string> preinclude_files;
458 
464  bool builtin_move_and_forward { true };
465 
475  bool increase_stack_limit_to_max { true };
476 
482  bool builtin_initializer_list { true };
483 
491  ::std::vector<::std::string> extra_options;
492 
493  ::std::unordered_map<error::number_t, error::handling_method_t> error_handling_overrides;
494 
495 public: // "shorthands" for more complex option setting
496 
500  {
501  language_dialect = {};
502  return *this;
503  }
504 
508  {
509  language_dialect = dialect;
510  return *this;
511  }
512 
514  compilation_options_t& set_language_dialect(const char* dialect_name)
515  {
516  return (dialect_name == nullptr or *dialect_name == '\0') ?
517  clear_language_dialect() :
518  set_language_dialect(detail_::cpp_dialect_from_name(dialect_name));
519  }
520 
522  compilation_options_t& set_language_dialect(const ::std::string& dialect_name)
523  {
524  return dialect_name.empty() ?
525  clear_language_dialect() :
526  set_language_dialect(dialect_name.c_str());
527  }
528 
532  {
533  error_handling_overrides[error_number] = error::suppress;
534  return *this;
535  }
536 
540  {
541  error_handling_overrides[error_number] = error::raise_error;
542  return *this;
543  }
544 
548  {
549  error_handling_overrides[error_number] = error::warn;
550  return *this;
551  }
552 }; // compilation_options_t<cuda_cpp>
553 
554 template <typename CompilationOptions>
555 inline ::std::string render(const CompilationOptions& opts)
556 {
557  return marshalling::render(opts);
558 }
559 
560 } // namespace rtc
561 
562 namespace marshalling {
563 
564 namespace detail_ {
565 
566 template <typename MarshalTarget, typename Delimiter>
567 struct gadget<rtc::compilation_options_t<ptx>, MarshalTarget, Delimiter> {
568  static void process(
570  MarshalTarget &marshalled, Delimiter delimiter,
571  bool need_delimiter_after_last_option)
572  {
573  opt_start_t<Delimiter> opt_start { delimiter };
574  // TODO: Consider taking an option to be verbose in specifying compilation flags, and setting option values
575  // even when they are the compiler defaults.
576 
577  // flags
578  if (opts.generate_relocatable_device_code) { marshalled << opt_start << "--compile-only"; }
579  if (opts.compile_as_tools_patch) { marshalled << opt_start << "--compile-as-tools-patch"; }
580  if (opts.generate_debug_info) { marshalled << opt_start << "--device-debug"; }
581  if (opts.generate_source_line_info) { marshalled << opt_start << "--generate-line-info"; }
582  if (opts.compile_extensible_whole_program) { marshalled << opt_start << "--extensible-whole-program"; }
583  if (not opts.use_fused_multiply_add) { marshalled << opt_start << "--fmad false"; }
584  if (opts.verbose) { marshalled << opt_start << "--verbose"; }
585  if (opts.dont_merge_basicblocks) { marshalled << opt_start << "--dont-merge-basicblocks"; }
586  {
587  const auto& osw = opts.situation_warnings;
588  if (osw.double_precision_ops) { marshalled << opt_start << "--warn-on-double-precision-use"; }
589  if (osw.local_memory_use) { marshalled << opt_start << "--warn-on-local-memory-usage"; }
590  if (osw.registers_spill_to_local_memory) { marshalled << opt_start << "--warn-on-spills"; }
591  if (not osw.indeterminable_stack_size) { marshalled << opt_start << "--suppress-stack-size-warning"; }
592  if (osw.double_demotion) { marshalled << opt_start << "--suppress-double-demote-warning"; }
593  }
594  if (opts.disable_warnings) { marshalled << opt_start << "--disable-warnings"; }
595  if (opts.disable_optimizer_constants) { marshalled << opt_start << "--disable-optimizer-constants"; }
596 
597 
598  if (opts.return_at_end_of_kernel) { marshalled << opt_start << "--return-at-end"; }
599  if (opts.preserve_variable_relocations) { marshalled << opt_start << "--preserve-relocs"; }
600 
601  // Non-flag single-value options
602 
603  if (opts.optimization_level) {
604  marshalled << opt_start << "--opt-level" << opts.optimization_level.value();
605  if (opts.optimization_level.value() < rtc::O2
607  {
608  marshalled << opt_start << "--allow-expensive-optimizations";
609  }
610  }
611 
612  if (opts.maximum_register_counts.kernel) {
613  marshalled << opt_start << "--maxrregcount " << opts.maximum_register_counts.kernel.value();
614  }
615  if (opts.maximum_register_counts.device_function) {
616  marshalled << opt_start << "--device-function-maxrregcount " << opts.maximum_register_counts.device_function.value();
617  }
618 
619  {
620  const auto& ocm = opts.caching_modes;
621  if (ocm.default_.load) { marshalled << opt_start << "--def-load-cache " << ocm.default_.load.value(); }
622  if (ocm.default_.store) { marshalled << opt_start << "--def-store-cache " << ocm.default_.store.value(); }
623  if (ocm.forced.load) { marshalled << opt_start << "--force-load-cache " << ocm.forced.load.value(); }
624  if (ocm.forced.store) { marshalled << opt_start << "--force-store-cache " << ocm.forced.store.value(); }
625  }
626 
627  // Multi-value options
628 
629  for(const auto& target : opts.targets_) {
630  auto prefix = opts.parse_without_code_generation ? "compute" : "sm";
631  marshalled << opt_start << "--gpu-name=" << prefix << '_' << target.as_combined_number();
632  }
633 
634  if (not opts.mangled_entry_function_names.empty()) {
635  marshalled << opt_start << "--entry";
636  bool first = true;
637  for (const auto &entry: opts.mangled_entry_function_names) {
638  if (first) { first = false; }
639  else { marshalled << ','; }
640  marshalled << entry;
641  }
642  }
643 
644  if (need_delimiter_after_last_option) {
645  marshalled << opt_start; // If no options were marshalled, this does nothing
646  }
647  }
648 };
649 
650 template <typename MarshalTarget, typename Delimiter>
651 struct gadget<rtc::compilation_options_t<cuda_cpp>, MarshalTarget, Delimiter> {
652  static void process(
653  const rtc::compilation_options_t<cuda_cpp>& opts, MarshalTarget& marshalled, Delimiter delimiter,
654  bool need_delimiter_after_last_option)
655  {
656  opt_start_t<Delimiter> opt_start { delimiter };
657  if (opts.generate_relocatable_device_code) { marshalled << opt_start << "--relocatable-device-code=true"; }
658  if (opts.compile_extensible_whole_program) { marshalled << opt_start << "--extensible-whole-program=true"; }
659  if (opts.generate_debug_info) { marshalled << opt_start << "--device-debug"; }
660  if (opts.generate_source_line_info) { marshalled << opt_start << "--generate-line-info"; }
661  if (opts.support_128bit_integers) { marshalled << opt_start << "--device-int128"; }
662  if (opts.indicate_function_inlining) { marshalled << opt_start << "--optimization-info=inline"; }
663 #if CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
664  if (opts.compiler_self_identification) { marshalled << opt_start << "--version-ident=true"; }
665 #endif // CUDA_VERSION >= 11200 && CUDA_VERSION <= 12200
666  if (opts.syntax_check_only) { marshalled << opt_start << "--fdevice-syntax-only"; }
667  if (opts.less_builtins) { marshalled << opt_start << "--minimal"; }
668  if (not opts.builtin_initializer_list) { marshalled << opt_start << "--builtin-initializer-list=false"; }
669  if (not opts.source_dirs_in_include_path) { marshalled << opt_start << "--no-source-include"; }
670  if (opts.extra_device_vectorization) { marshalled << opt_start << "--extra-device-vectorization"; }
671  if (opts.disable_warnings) { marshalled << opt_start << "--disable-warnings"; }
672  if (opts.assume_restrict) { marshalled << opt_start << "--restrict"; }
673  if (opts.default_execution_space_is_device) { marshalled << opt_start << "--device-as-default-execution-space"; }
674  if (not opts.display_error_numbers) { marshalled << opt_start << "--no-display-error-number"; }
675  if (not opts.builtin_move_and_forward) { marshalled << opt_start << "--builtin-move-forward=false"; }
676  if (not opts.increase_stack_limit_to_max) { marshalled << opt_start << "--modify-stack-limit=false"; }
677  if (opts.link_time_optimization) { marshalled << opt_start << "--dlink-time-opt"; }
678  if (opts.use_fast_math) { marshalled << opt_start << "--use_fast_math"; }
679  else {
680  if (opts.flush_denormal_floats_to_zero) { marshalled << opt_start << "--ftz"; }
681  if (not opts.use_precise_square_root) { marshalled << opt_start << "--prec-sqrt=false"; }
682  if (not opts.use_precise_division) { marshalled << opt_start << "--prec-div=false"; }
683  if (not opts.use_fused_multiply_add) { marshalled << opt_start << "--fmad=false"; }
684  }
686  marshalled << opt_start << "--dopt=on";
687  }
688  if (not opts.ptxas.empty()) {
689  marshalled << opt_start << "--ptxas-options=" << opts.ptxas;
690 
691  }
692 
693  if (opts.language_dialect) {
694  marshalled << opt_start << "--std=" << rtc::detail_::cpp_dialect_names[static_cast<unsigned>(opts.language_dialect.value())];
695  }
696 
697  if (opts.maximum_register_count) {
698  marshalled << opt_start << "--maxrregcount=" << opts.maximum_register_count.value();
699  }
700 
701  // Multi-value options
702 
703  for(const auto& target : opts.targets_) {
704  #if CUDA_VERSION < 11000
705  marshalled << opt_start << "--gpu-architecture=compute_" << target.as_combined_number();
706  #else
707  marshalled << opt_start << "--gpu-architecture=sm_" << target.as_combined_number();
708  #endif
709  }
710 
711  for(const auto& def : opts.undefines) {
712  marshalled << opt_start << "-U" << def;
713  // Note: Could alternatively use "--undefine-macro=" instead of "-D"
714  }
715 
716 
717  for(const auto& def : opts.no_value_defines) {
718  marshalled << opt_start << "-D" << def;
719  // Note: Could alternatively use "--define-macro=" instead of "-D"
720  }
721 
722  for(const auto& def : opts.valued_defines) {
723  marshalled << opt_start << "-D" << def.first << '=' << def.second;
724  }
725 
726  for(const auto& path : opts.additional_include_paths) {
727  marshalled << opt_start << "--include-path=" << path;
728  }
729 
730  for(const auto& preinclude_file : opts.preinclude_files) {
731  marshalled << opt_start << "--pre-include=" << preinclude_file;
732  }
733 
734  for(const auto& override : opts.error_handling_overrides) {
735  marshalled
736  << opt_start << "--diag-" << rtc::error::detail_::option_name_part(override.second)
737  << '=' << override.first ;
738  }
739 
740  for(const auto& extra_opt : opts.extra_options) {
741  marshalled << opt_start << extra_opt;
742  }
743 
744  if (need_delimiter_after_last_option) {
745  marshalled << opt_start; // If no options were marshalled, this does nothing
746  }
747  }
748 };
749 
750 } // namespace detail_
751 
752 } // namespace marshalling
753 
754 } // namespace cuda
755 
756 #endif // CUDA_API_WRAPPERS_RTC_COMPILATION_OPTIONS_HPP_
::std::vector<::std::string > mangled_entry_function_names
Specifies the GPU kernels, or __global__ functions in CUDA-C++ terms, or .entry functions in PTX term...
Definition: compilation_options.hpp:283
bool default_execution_space_is_device
Assume functions without an explicit specification of their execution space are __device__ rather tha...
Definition: compilation_options.hpp:428
::std::string ptxas
Extra options for the PTX compiler (a.k.a. "PTX optimizing assembler").
Definition: compilation_options.hpp:434
compilation_options_t & clear_language_dialect()
Let the compiler interpret the program source code using its default-assumption for the C++ language ...
Definition: compilation_options.hpp:499
bool allow_expensive_optimizations_below_O2
Allow the JIT compiler to perform expensive optimizations using maximum available resources (memory a...
Definition: compilation_options.hpp:182
bool use_fused_multiply_add
Enables (disables) the contraction of floating-point multiplies and adds/subtracts into floating-poin...
Definition: compilation_options.hpp:388
The CUDA variant of C++, accepted by the NVRTC library.
Definition: types.hpp:41
::std::unordered_map<::std::string,::std::string > valued_defines
Preprocessor macros to have the compiler define to specific values.
Definition: compilation_options.hpp:418
bool compile_as_tools_patch
Compile as patch code for CUDA tools.
Definition: compilation_options.hpp:193
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
handling_method_t
Possible ways of handling a potentially problematic finding by the compiler in the program source cod...
Definition: compilation_options.hpp:67
bool generate_relocatable_device_code
Generate relocatable code that can be linked with other relocatable device code.
Definition: common_ptx_compilation_options.hpp:195
compilation_options_t & suppress_error(error::number_t error_number)
Ignore compiler findings of the specified number (rather than warnings about them or raising an error...
Definition: compilation_options.hpp:531
bool optimize_device_code_in_debug_mode
If debug mode is enabled, perform limited optimizations of device code rather than none at all...
Definition: compilation_options.hpp:313
bool extra_device_vectorization
Enables more aggressive device code vectorization in the LTO IR optimizer.
Definition: compilation_options.hpp:406
unsigned number_t
Errors, or problematic findings, by the compiler are identified by a number of this type...
Definition: compilation_options.hpp:70
bool compile_extensible_whole_program
Expecting only whole-programs to be directly usable, allow some calls to not be resolved until device...
Definition: compilation_options.hpp:199
bool use_fast_math
Make use of fast math operations.
Definition: compilation_options.hpp:392
A numeric designator of the computational capabilities of a CUDA device.
Definition: device_properties.hpp:75
optional< optimization_level_t > optimization_level
Compilation optimization level (as in -O1, -O2 etc.)
Definition: common_ptx_compilation_options.hpp:158
bool support_128bit_integers
Allow the use of the 128-bit __int128 type in the code.
Definition: compilation_options.hpp:318
compilation_options_t & set_language_dialect(const char *dialect_name)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:514
optional< caching_mode_t< memory_operation_t::load > > default_load_caching_mode() const override
Get the caching mode the compiler will be told to use as the default, for load instructions which don...
Definition: compilation_options.hpp:272
Options to be passed to one of the NVIDIA JIT compilers along with a program&#39;s source code...
Definition: compilation_options.hpp:164
bool link_time_optimization
Do not compile fully into PTX/Cubin.
Definition: compilation_options.hpp:399
Options for JIT-compilation of CUDA C++ code.
Definition: compilation_options.hpp:292
compilation_options_t & set_language_dialect(cpp_dialect_t dialect)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:507
bool compile_extensible_whole_program
Do extensible whole program compilation of device code.
Definition: compilation_options.hpp:305
optional< caching_mode_t< memory_operation_t::load > > & default_load_caching_mode() override
Get a reference to the caching mode the compiler will be told to use as the default, for load instructions which don&#39;t explicitly specify a particular caching mode.
Definition: compilation_options.hpp:265
bool disable_warnings
Have the compiler treat all warnings as though they were suppressed, and print nothing.
Definition: compilation_options.hpp:421
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149
optional< cpp_dialect_t > language_dialect
The dialect of C++ as which the compiler will be forced to interpret the program source code...
Definition: compilation_options.hpp:409
::std::vector<::std::string > additional_include_paths
A sequence of directories to be searched for headers.
Definition: compilation_options.hpp:445
compilation_options_t & set_language_dialect(const ::std::string &dialect_name)
Set which dialect of the C++ language the compiler will try to interpret the program source code as...
Definition: compilation_options.hpp:522
struct cuda::rtc::compilation_options_t< ptx >::@21 maximum_register_counts
Limits on the number of registers which generated object code (of different kinds) is allowed to use...
::std::unordered_set<::std::string > undefines
Preprocessor macros to tell the compiler to specifically _un_define.
Definition: compilation_options.hpp:415
struct cuda::rtc::compilation_options_t< ptx >::@20 situation_warnings
Warnings about situations likely to result in poor performance or other problems. ...
bool syntax_check_only
Stop compilation after the front-end has verified the program&#39;s syntax.
Definition: compilation_options.hpp:339
bool return_at_end_of_kernel
Prevents the optimizing away of the return instruction at the end of a program (a kernel...
Definition: compilation_options.hpp:225
bool disable_optimizer_constants
Disable use of the "optimizer constant bank" feature.
Definition: compilation_options.hpp:221
bool generate_debug_info
Generate debugging information associating SASS instructions to locations in the source, embedding it within the compilation output (-g)
Definition: common_ptx_compilation_options.hpp:169
bool use_precise_division
For single-precision floating-point division and reciprocals, use IEEE round-to-nearest mode or use a...
Definition: compilation_options.hpp:381
bool assume_restrict
Treat all kernel pointer parameters as if they had the restrict (or __restrict) qualifier.
Definition: compilation_options.hpp:424
compilation_options_base_t & add_target(device::compute_capability_t compute_capability)
Have the compilation also target a specific compute capability.
Definition: compilation_options.hpp:117
bool builtin_move_and_forward
Provide builtin definitions of ::std::move and ::std::forward.
Definition: compilation_options.hpp:464
bool generate_source_line_info
Generate indications of which PTX/SASS instructions correspond to which lines of the source code...
Definition: common_ptx_compilation_options.hpp:165
::std::unordered_set<::std::string > no_value_defines
Preprocessor macros to have the compiler define, without specifying a particular value.
Definition: compilation_options.hpp:412
bool preserve_variable_relocations
Generate relocatable references for variables and preserve relocations generated for them in the link...
Definition: compilation_options.hpp:229
bool dont_merge_basicblocks
Prevent the compiler from merging consecutive basic blocks (https://en.wikipedia.org/wiki/Basic_block...
Definition: compilation_options.hpp:215
bool builtin_initializer_list
Provide builtin definitions of ::std::initializer_list class and member functions.
Definition: compilation_options.hpp:482
bool parse_without_code_generation
Makes the PTX compiler run without producing any CUBIN output (for PTX verification only) ...
Definition: compilation_options.hpp:178
device::compute_capability_t compute_capability() const
Obtains the device&#39;s compute capability; see cuda::device::compute_capability_t.
Definition: device.hpp:415
NVIDIA&#39;s architecture-inspecific intermediate program representation language, known as PTX or Parall...
Definition: types.hpp:44
bool verbose
Print code generation statistics along with the compilation log.
Definition: compilation_options.hpp:206
bool use_precise_square_root
For single-precision floating-point square root, use IEEE round-to-nearest mode or use a faster appro...
Definition: compilation_options.hpp:374
::std::unordered_set< cuda::device::compute_capability_t > targets_
Target devices in terms of CUDA compute capability.
Definition: compilation_options.hpp:105
compilation_options_t & warn_about(error::number_t error_number)
Treat compiler findings of the specified number as warnings (rather than raising an error or ignoring...
Definition: compilation_options.hpp:547
::std::vector<::std::string > preinclude_files
Header files to preinclude during preprocessing of the source.
Definition: compilation_options.hpp:457
caching_mode_spec_t default_
The caching mode to be used for instructions which don&#39;t specify a caching mode.
Definition: compilation_options.hpp:257
compilation_options_t & treat_as_error(error::number_t error_number)
Treat compiler findings of the specified number as an error (rather than suppressing them or just war...
Definition: compilation_options.hpp:539
bool flush_denormal_floats_to_zero
When performing single-precision floating-point operations, flush denormal values to zero...
Definition: compilation_options.hpp:367
bool disable_warnings
The equivalent of suppressing all findings which currently trigger a warning.
Definition: compilation_options.hpp:218
bool display_error_numbers
Display (error) numbers for warning (and error?) messages, in addition to the message itself...
Definition: compilation_options.hpp:431
Compilation options common to all kinds of JIT-compilable programs.
Definition: compilation_options.hpp:86
bool increase_stack_limit_to_max
Use setrlimit() to increase the stack size to the maximum the OS allows.
Definition: compilation_options.hpp:475
Wrapper class for a CUDA device.
Definition: device.hpp:135
bool use_fused_multiply_add
Enable the contraction of multiplcations-followed-by-additions (or subtractions) into single fused in...
Definition: compilation_options.hpp:203
Options for JIT-compilation of CUDA PTX code.
Definition: compilation_options.hpp:168
bool indicate_function_inlining
emit a remark when a function is inlined
Definition: compilation_options.hpp:323
bool source_dirs_in_include_path
Implicitly add the directories of source files (TODO: Which source files?) as include file search pat...
Definition: compilation_options.hpp:403
::std::vector<::std::string > extra_options
Support for additional, arbitrary options which may not be covered by other fields in this class (e...
Definition: compilation_options.hpp:491
bool less_builtins
Have the compiler not provide support for various builtins:
Definition: compilation_options.hpp:349
int optimization_level_t
The type used to specify the intensity, and extent of allowed implication, of optimization efforts...
Definition: common_ptx_compilation_options.hpp:26
optional< size_t > maximum_register_count
Specify the maximum amount of registers that GPU functions can use.
Definition: compilation_options.hpp:360