cuda-api-wrappers
Thin C++-flavored wrappers for the CUDA Runtime API
common_ptx_compilation_options.hpp
Go to the documentation of this file.
1 
7 #pragma once
8 #ifndef CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_
9 #define CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_
10 
11 #include "types.hpp"
12 #include "device.hpp"
13 
14 #include <array>
15 
16 namespace cuda {
17 
18 namespace rtc {
19 
22 using ptx_register_count_t = int16_t;
23 
27 
28 } // namespace rtc
29 
30 enum class memory_operation_t { load, store };
31 
33 template <memory_operation_t Op> struct caching;
34 
36 template <> struct caching<memory_operation_t::load> {
37 
40  enum mode {
50  ca = 0, all = ca, cache_all = ca, cache_at_all_levels = ca, cash_in_l1_and_l2 = ca,
51 
58  cg = 1, global = cg, cache_global = cg, cache_at_global_level = cg, cache_in_l2_only = cache_at_global_level,
59 
69  cs = 2, evict_first = cs, cache_as_evict_first = cs, cache_streaming = cs,
70 
80  lu = 3, last_use = lu,
81 
90  cv = 4, dont_cache = cv, fetch_again_and_dont_cache = cv,
91  };
92  static constexpr const char* mode_names[] = { "ca", "cg", "cs", "lu", "cv" };
93 };
94 
96 template <> struct caching<memory_operation_t::store> {
97 
100  enum mode {
101  wb = 0, write_back = wb, write_back_coherent_levels = wb,
102  cg = 1, global = cg, cache_global = cg, cache_at_global_level = cg,
103  cs = 2, evict_first = cs, cache_as_evict_first = cs, cache_streaming = cs,
104  wt = 3, write_through = wt, write_through_to_system_memory = wt
105  };
106 
108  static constexpr const char* mode_names[] = { "wb", "cg", "cs", "wt" };
110 };
111 
112 template <memory_operation_t Op>
113 using caching_mode_t = typename caching<Op>::mode;
114 
115 namespace detail_ {
116 
117 template <memory_operation_t Op>
118 const char* name(caching_mode_t<Op> mode)
119 {
120  return caching<Op>::mode_names[static_cast<int>(mode)];
121 }
122 
123 } // namespace detail_
124 
126 template <memory_operation_t Op>
127 inline ::std::ostream& operator<< (::std::ostream& os, caching_mode_t<Op> lcm)
128 {
129  return os << detail_::name(lcm);
130 }
132 
133 namespace rtc {
134 
139 constexpr const struct {
140  optimization_level_t minimum;
141  optimization_level_t maximum;
143 
150 
152  optional<ptx_register_count_t> max_num_registers_per_thread{};
153 
155  optional<grid::block_dimension_t> min_num_threads_per_block{};
156 
158  optional<optimization_level_t> optimization_level{};
159 
161  optional<device::compute_capability_t> specific_target;
162 
165  bool generate_source_line_info {false};
166 
169  bool generate_debug_info {false};
170 
174  optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode_;
175 
178  virtual optional<caching_mode_t<memory_operation_t::load>>& default_load_caching_mode()
179  {
180  return default_load_caching_mode_;
181  }
182 
183  virtual optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode() const
184  {
185  return default_load_caching_mode_;
186  }
188 
195  bool generate_relocatable_device_code { false };
196 
197  // What about store caching?
198 }; // common_ptx_compilation_options_t
199 
200 } // namespace rtc
201 } // namespace cuda
202 
203 #endif // CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_
A proxy class for CUDA devices, providing access to all Runtime API calls involving their use and man...
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22
Cache streaming, likely to be accessed once.
Definition: common_ptx_compilation_options.hpp:69
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:100
virtual optional< caching_mode_t< memory_operation_t::load > > & default_load_caching_mode()
see default_load_caching_mode_
Definition: common_ptx_compilation_options.hpp:178
optional< caching_mode_t< memory_operation_t::load > > default_load_caching_mode_
Which of the memory-load-instruction caching modes (see {caching_mode_t}) to use by default...
Definition: common_ptx_compilation_options.hpp:174
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:40
ca - Cache at all levels, likely to be accessed again.
Definition: common_ptx_compilation_options.hpp:50
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149
Don&#39;t cache and fetch again (consider cached system memory lines stale, fetch again).
Definition: common_ptx_compilation_options.hpp:90
int16_t ptx_register_count_t
The type used to specify the maximum number of SM registers to use, to the PTX compiler.
Definition: common_ptx_compilation_options.hpp:22
Cache at global level (cache in L2 and below, not L1).
Definition: common_ptx_compilation_options.hpp:58
A helper struct for templatizing caching<Op>::mode.
Definition: common_ptx_compilation_options.hpp:33
Last use.
Definition: common_ptx_compilation_options.hpp:80
constexpr const struct cuda::rtc::@0 valid_optimization_level_range
The range of optimization level values outside of which the compiler is certain not to support...
optional< device::compute_capability_t > specific_target
Which NVIDIA physical architecture to generate SASS code for.
Definition: common_ptx_compilation_options.hpp:161
Fundamental CUDA-related type definitions.
int optimization_level_t
The type used to specify the intensity, and extent of allowed implication, of optimization efforts...
Definition: common_ptx_compilation_options.hpp:26