eyalroz/cuda-api-wrappers/common__ptx__compilation__options_8hpp_source.html

 #pragma once
 #ifndef CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_
 #define CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_

 #include "types.hpp"
 #include "device.hpp"

 #include <array>

 namespace cuda {

 namespace rtc {

 using ptx_register_count_t = int16_t;

 using optimization_level_t = int;

 } // namespace rtc

 enum class memory_operation_t { load, store };

 template <memory_operation_t Op> struct caching;

 template <> struct caching<memory_operation_t::load> {

     enum mode {
         ca = 0, all = ca, cache_all = ca, cache_at_all_levels = ca, cash_in_l1_and_l2 = ca,

         cg = 1, global = cg, cache_global = cg, cache_at_global_level = cg, cache_in_l2_only = cache_at_global_level,

         cs = 2, evict_first = cs, cache_as_evict_first = cs, cache_streaming = cs,

         lu = 3, last_use = lu,

         cv = 4, dont_cache = cv, fetch_again_and_dont_cache = cv,
     };
     static constexpr const char* mode_names[] = { "ca", "cg", "cs", "lu", "cv" };
 };

 template <> struct caching<memory_operation_t::store> {

     enum mode {
         wb = 0, write_back = wb, write_back_coherent_levels = wb,
         cg = 1, global = cg, cache_global = cg, cache_at_global_level = cg,
         cs = 2, evict_first = cs, cache_as_evict_first = cs, cache_streaming = cs,
         wt = 3, write_through = wt, write_through_to_system_memory = wt
     };

     static constexpr const char* mode_names[] = { "wb", "cg", "cs", "wt" };
 };

 template <memory_operation_t Op>
 using caching_mode_t = typename caching<Op>::mode;

 namespace detail_ {

 template <memory_operation_t Op>
 const char* name(caching_mode_t<Op> mode)
 {
     return caching<Op>::mode_names[static_cast<int>(mode)];
 }

 } // namespace detail_

 template <memory_operation_t Op>
 inline ::std::ostream& operator<< (::std::ostream& os, caching_mode_t<Op> lcm)
 {
     return os << detail_::name(lcm);
 }

 namespace rtc {

 constexpr const struct {
     optimization_level_t minimum;
     optimization_level_t maximum;
 } valid_optimization_level_range {0, 4};

 struct common_ptx_compilation_options_t {

     optional<ptx_register_count_t> max_num_registers_per_thread{};

     optional<grid::block_dimension_t> min_num_threads_per_block{};

     optional<optimization_level_t> optimization_level{};

     optional<device::compute_capability_t> specific_target;

     bool generate_source_line_info {false};

     bool generate_debug_info {false};

     optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode_;

     virtual optional<caching_mode_t<memory_operation_t::load>>& default_load_caching_mode()
     {
         return default_load_caching_mode_;
     }

     virtual optional<caching_mode_t<memory_operation_t::load>> default_load_caching_mode() const
     {
         return default_load_caching_mode_;
     }

     bool generate_relocatable_device_code { false };

     // What about store caching?
 }; // common_ptx_compilation_options_t

 } // namespace rtc
 } // namespace cuda

 #endif // CUDA_API_WRAPPERS_COMMON_PTX_COMPILATION_OPTIONS_HPP_
device.hpp
A proxy class for CUDA devices, providing access to all Runtime API calls involving their use and man...

cuda
Definitions and functionality wrapping CUDA APIs.
Definition: array.hpp:22

cuda::caching< memory_operation_t::load >::cs
Cache streaming, likely to be accessed once.
Definition: common_ptx_compilation_options.hpp:69

cuda::caching< memory_operation_t::store >::mode
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:100

cuda::rtc::common_ptx_compilation_options_t::default_load_caching_mode
virtual optional< caching_mode_t< memory_operation_t::load > > & default_load_caching_mode()
see default_load_caching_mode_
Definition: common_ptx_compilation_options.hpp:178

cuda::rtc::common_ptx_compilation_options_t::default_load_caching_mode_
optional< caching_mode_t< memory_operation_t::load > > default_load_caching_mode_
Which of the memory-load-instruction caching modes (see {caching_mode_t}) to use by default...
Definition: common_ptx_compilation_options.hpp:174

cuda::caching< memory_operation_t::load >::mode
mode
The combination of effects the execution of an instruction will have on the GPU caching mechanisms...
Definition: common_ptx_compilation_options.hpp:40

cuda::caching< memory_operation_t::load >::ca
ca - Cache at all levels, likely to be accessed again.
Definition: common_ptx_compilation_options.hpp:50

cuda::rtc::common_ptx_compilation_options_t
A subset of the options for compiling PTX code into SASS, usable both with the CUDA driver and with N...
Definition: common_ptx_compilation_options.hpp:149

cuda::caching< memory_operation_t::load >::cv
Don&#39;t cache and fetch again (consider cached system memory lines stale, fetch again).
Definition: common_ptx_compilation_options.hpp:90

cuda::rtc::ptx_register_count_t
int16_t ptx_register_count_t
The type used to specify the maximum number of SM registers to use, to the PTX compiler.
Definition: common_ptx_compilation_options.hpp:22

cuda::caching< memory_operation_t::load >::cg
Cache at global level (cache in L2 and below, not L1).
Definition: common_ptx_compilation_options.hpp:58

cuda::caching
A helper struct for templatizing caching<Op>::mode.
Definition: common_ptx_compilation_options.hpp:33

cuda::caching< memory_operation_t::load >::lu
Last use.
Definition: common_ptx_compilation_options.hpp:80

cuda::rtc::valid_optimization_level_range
constexpr const struct cuda::rtc::@0 valid_optimization_level_range
The range of optimization level values outside of which the compiler is certain not to support...

cuda::rtc::common_ptx_compilation_options_t::specific_target
optional< device::compute_capability_t > specific_target
Which NVIDIA physical architecture to generate SASS code for.
Definition: common_ptx_compilation_options.hpp:161

types.hpp
Fundamental CUDA-related type definitions.

cuda::rtc::optimization_level_t
int optimization_level_t
The type used to specify the intensity, and extent of allowed implication, of optimization efforts...
Definition: common_ptx_compilation_options.hpp:26