8 #ifndef CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_ 9 #define CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_ 30 enum class memory_operation_t { load, store };
33 template <memory_operation_t Op>
struct caching;
36 template <>
struct caching<memory_operation_t::load> {
50 ca = 0, all =
ca, cache_all =
ca, cache_at_all_levels =
ca, cash_in_l1_and_l2 =
ca,
58 cg = 1, global =
cg, cache_global =
cg, cache_at_global_level =
cg, cache_in_l2_only = cache_at_global_level,
69 cs = 2, evict_first =
cs, cache_as_evict_first =
cs, cache_streaming =
cs,
80 lu = 3, last_use =
lu,
90 cv = 4, dont_cache =
cv, fetch_again_and_dont_cache =
cv,
92 static constexpr
const char* mode_names[] = {
"ca",
"cg",
"cs",
"lu",
"cv" };
96 template <>
struct caching<memory_operation_t::store> {
101 wb = 0, write_back = wb, write_back_coherent_levels = wb,
102 cg = 1, global = cg, cache_global = cg, cache_at_global_level = cg,
103 cs = 2, evict_first = cs, cache_as_evict_first = cs, cache_streaming = cs,
104 wt = 3, write_through = wt, write_through_to_system_memory = wt
108 static constexpr
const char* mode_names[] = {
"wb",
"cg",
"cs",
"wt" };
112 template <memory_operation_t Op>
117 template <memory_operation_t Op>
118 const char* name(caching_mode_t<Op>
mode)
126 template <memory_operation_t Op>
127 inline ::std::ostream& operator<< (::std::ostream& os, caching_mode_t<Op> lcm)
129 return os << detail_::name(lcm);
139 constexpr
const struct {
152 optional<ptx_register_count_t> max_num_registers_per_thread{};
155 optional<grid::block_dimension_t> min_num_threads_per_block{};
158 optional<optimization_level_t> optimization_level{};
165 bool generate_source_line_info {
false};
169 bool generate_debug_info {
false};
180 return default_load_caching_mode_;
183 virtual optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode()
const 185 return default_load_caching_mode_;
195 bool generate_relocatable_device_code {
false };
203 #endif // CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_ A proxy class for CUDA devices, providing access to all Runtime API calls involving their use and man...
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
Cache streaming, likely to be accessed once.
Definition: common_ptx_compilation_options.hpp:69
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:100
virtual optional< caching_mode_t< memory_operation_t::load > > & default_load_caching_mode()
see default_load_caching_mode_
Definition: common_ptx_compilation_options.hpp:178
optional< caching_mode_t< memory_operation_t::load > > default_load_caching_mode_
Which of the memory-load-instruction caching modes (see {caching_mode_t}) to use by default...
Definition: common_ptx_compilation_options.hpp:174
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:40
ca - Cache at all levels, likely to be accessed again.
Definition: common_ptx_compilation_options.hpp:50
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149
Don't cache and fetch again (consider cached system memory lines stale, fetch again).
Definition: common_ptx_compilation_options.hpp:90
int16_t ptx_register_count_t
The type used to specify the maximum number of SM registers to use, to the PTX compiler.
Definition: common_ptx_compilation_options.hpp:22
Cache at global level (cache in L2 and below, not L1).
Definition: common_ptx_compilation_options.hpp:58
A helper struct for templatizing caching<Op>::mode.
Definition: common_ptx_compilation_options.hpp:33
Last use.
Definition: common_ptx_compilation_options.hpp:80
constexpr const struct cuda::rtc::@0 valid_optimization_level_range
The range of optimization level values outside of which the compiler is certain not to support...
optional< device::compute_capability_t > specific_target
Which NVIDIA physical architecture to generate SASS code for.
Definition: common_ptx_compilation_options.hpp:161
Fundamental CUDA-related type definitions.
int optimization_level_t
The type used to specify the intensity, and extent of allowed implication, of optimization efforts...
Definition: common_ptx_compilation_options.hpp:26