Expression Templates Library (ETL)
eval_selectors.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
14 #pragma once
15 
16 namespace etl::detail {
17 
18 // Utilities
19 
26 template <vector_mode_t V, typename E, typename R>
27 constexpr bool are_vectorizable_select = vectorize_expr // ETL must be allowed to vectorize expressions
28  && decay_traits<R>::template vectorizable<V> // The LHS expression must be vectorizable
29  && decay_traits<E>::template vectorizable<V> // The RHS expression must be vectorizable
31  == decay_traits<R>::storage_order // Both expressions must have the same order
32  && get_intrinsic_traits<V>::template type<value_t<R>>::vectorizable // The LHS type must be vectorizable
33  && get_intrinsic_traits<V>::template type<value_t<E>>::vectorizable // The RHS type must be vectorizable
34  && std::is_same_v<
35  typename get_intrinsic_traits<V>::template type<value_t<R>>::intrinsic_type,
36  typename get_intrinsic_traits<V>::template type<value_t<E>>::intrinsic_type>;
37 
41 template <typename E, typename R>
42 constexpr bool are_vectorizable = (avx512_enabled && are_vectorizable_select<vector_mode_t::AVX512, E, R>)
43  || (avx_enabled && are_vectorizable_select<vector_mode_t::AVX, E, R>)
44  || (sse3_enabled && are_vectorizable_select<vector_mode_t::SSE3, E, R>);
45 
51 template <typename E, typename R>
53  return (avx512_enabled && are_vectorizable_select<vector_mode_t::AVX512, E, R>)
55  : (avx_enabled && are_vectorizable_select<vector_mode_t::AVX, E, R>)
57  : (sse3_enabled && are_vectorizable_select<vector_mode_t::SSE3, E, R>) ? vector_mode_t::SSE3 : vector_mode_t::NONE;
58 }
59 
60 //Selectors for assign
61 
67 template <typename E, typename R>
68 constexpr bool fast_assign = all_dma<E, R>;
69 
73 template <typename E, typename R>
74 constexpr bool gpu_assign = all_homogeneous<E, R> && !fast_assign<E, R> && all_gpu_computable<E, R> && is_dma<R> && !is_scalar<E>;
75 
79 template <typename E, typename R>
80 constexpr bool vectorized_assign = !fast_assign<E, R> && !gpu_assign<E, R> && are_vectorizable<E, R>;
81 
85 template <typename E, typename R>
86 constexpr bool direct_assign = !gpu_assign<E, R> && !are_vectorizable<E, R> && !is_dma<E> && is_dma<R>;
87 
91 template <typename E, typename R>
92 constexpr bool standard_assign = !is_dma<R>;
93 
94 //Selectors for compound operations
95 
99 template <typename E, typename R>
100 constexpr bool gpu_compound = all_homogeneous<E, R>&& all_gpu_computable<E, R>&& is_dma<R>&& cublas_enabled&& egblas_enabled;
101 
105 template <typename E, typename R>
106 constexpr bool vectorized_compound = !gpu_compound<E, R> && are_vectorizable<E, R>;
107 
111 template <typename E, typename R>
112 constexpr bool direct_compound = !gpu_compound<E, R> && !vectorized_compound<E, R> && is_dma<R>;
113 
117 template <typename E, typename R>
118 constexpr bool standard_compound = !gpu_compound<E, R> && !vectorized_compound<E, R> && !direct_compound<E, R>;
119 
120 //Selectors for compound div operation
121 
125 template <typename E, typename R>
126 constexpr bool gpu_compound_div = all_homogeneous<E, R>&& all_gpu_computable<E, R>&& is_dma<R>&& cublas_enabled&& egblas_enabled;
127 
131 template <typename E, typename R>
132 constexpr bool vectorized_compound_div = !gpu_compound_div<E, R> && (is_floating_t<value_t<E>> || is_complex_t<value_t<E>>)&&are_vectorizable<E, R>;
133 
137 template <typename E, typename R>
138 constexpr bool direct_compound_div = !gpu_compound_div<E, R> && !vectorized_compound_div<E, R> && is_dma<R>;
139 
143 template <typename E, typename R>
144 constexpr bool standard_compound_div = !gpu_compound_div<E, R> && !vectorized_compound_div<E, R> && !direct_compound_div<E, R>;
145 
146 //Selectors without GPU
147 
153 template <typename E, typename R>
154 constexpr bool fast_assign_no_gpu = all_dma<E, R>;
155 
159 template <typename E, typename R>
160 constexpr bool vectorized_assign_no_gpu = !fast_assign_no_gpu<E, R> && are_vectorizable<E, R>;
161 
165 template <typename E, typename R>
166 constexpr bool direct_assign_no_gpu = !are_vectorizable<E, R> && !is_dma<E> && is_dma<R>;
167 
171 template <typename E, typename R>
172 constexpr bool standard_assign_no_gpu = !is_dma<R>;
173 
174 //Selectors for compound operations
175 
179 template <typename E, typename R>
180 constexpr bool vectorized_compound_no_gpu = are_vectorizable<E, R>;
181 
185 template <typename E, typename R>
186 constexpr bool direct_compound_no_gpu = !vectorized_compound_no_gpu<E, R> && is_dma<R>;
187 
191 template <typename E, typename R>
192 constexpr bool standard_compound_no_gpu = !vectorized_compound_no_gpu<E, R> && !direct_compound_no_gpu<E, R>;
193 
194 //Selectors for compound div operation
195 
199 template <typename E, typename R>
200 constexpr bool vectorized_compound_div_no_gpu = (is_floating_t<value_t<E>> || is_complex_t<value_t<E>>)&&are_vectorizable<E, R>;
201 
205 template <typename E, typename R>
206 constexpr bool direct_compound_div_no_gpu = !vectorized_compound_div_no_gpu<E, R> && is_dma<R>;
207 
211 template <typename E, typename R>
212 constexpr bool standard_compound_div_no_gpu = !vectorized_compound_div_no_gpu<E, R> && !direct_compound_div_no_gpu<E, R>;
213 
214 } //end of namespace etl::detail
Traits to get the intrinsic traits for a vector mode.
Definition: vectorization.hpp:88
constexpr bool standard_compound_div_no_gpu
Integral constant indicating if a standard compound div assign is necessary.
Definition: eval_selectors.hpp:212
constexpr bool vectorized_compound_div
Integral constant indicating if a vectorized compound div assign is possible.
Definition: eval_selectors.hpp:132
constexpr bool direct_compound_no_gpu
Integral constant indicating if a direct compound assign is possible.
Definition: eval_selectors.hpp:186
constexpr bool vectorized_assign
Integral constant indicating if a vectorized assign is possible.
Definition: eval_selectors.hpp:80
vector_mode_t
Vectorization mode.
Definition: config.hpp:184
constexpr bool avx_enabled
Indicates if AVX is available.
Definition: config.hpp:205
No vectorization is available.
constexpr bool vectorized_compound_no_gpu
Integral constant indicating if a vectorized compound assign is possible.
Definition: eval_selectors.hpp:180
constexpr bool direct_assign
Integral constant indicating if a direct assign is possible.
Definition: eval_selectors.hpp:86
constexpr bool vectorized_compound
Integral constant indicating if a vectorized compound assign is possible.
Definition: eval_selectors.hpp:106
constexpr bool gpu_compound_div
Integral constant indicating if a GPU compound assign is possible.
Definition: eval_selectors.hpp:126
constexpr bool standard_compound_div
Integral constant indicating if a standard compound div assign is necessary.
Definition: eval_selectors.hpp:144
SSE3 is the max vectorization available.
constexpr bool standard_assign
Integral constant indicating if a standard assign is necessary.
Definition: eval_selectors.hpp:92
constexpr vector_mode_t select_vector_mode()
Select a vector mode for the given assignment type.
Definition: eval_selectors.hpp:52
Definition: expression_builder.hpp:699
constexpr bool direct_compound
Integral constant indicating if a direct compound assign is possible.
Definition: eval_selectors.hpp:112
constexpr bool fast_assign_no_gpu
Integral constant indicating if a fast assign is possible.
Definition: eval_selectors.hpp:154
Traits to get information about ETL types.
Definition: tmp.hpp:68
constexpr bool egblas_enabled
Indicates if the EGBLAS library is available for ETL.
Definition: config.hpp:119
constexpr bool vectorized_assign_no_gpu
Integral constant indicating if a vectorized assign is possible.
Definition: eval_selectors.hpp:160
constexpr bool direct_compound_div
Integral constant indicating if a direct compound div assign is possible.
Definition: eval_selectors.hpp:138
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
constexpr bool avx512_enabled
Indicates if AVX512 is available.
Definition: config.hpp:200
constexpr bool vectorize_expr
Indicates if the expressions can be automatically vectorized by ETL.
Definition: config.hpp:26
constexpr bool are_vectorizable_select
Traits to test if the given assignment is vectorizable with the given vector mode.
Definition: eval_selectors.hpp:27
constexpr bool gpu_compound
Integral constant indicating if a GPU compound assign is possible.
Definition: eval_selectors.hpp:100
constexpr bool standard_compound_no_gpu
Integral constant indicating if a standard compound assign is necessary.
Definition: eval_selectors.hpp:192
constexpr bool standard_assign_no_gpu
Integral constant indicating if a standard assign is necessary.
Definition: eval_selectors.hpp:172
AVX is the max vectorization available.
constexpr bool gpu_assign
Integral constant indicating if a GPU assign is possible.
Definition: eval_selectors.hpp:74
constexpr bool fast_assign
Integral constant indicating if a fast assign is possible.
Definition: eval_selectors.hpp:68
constexpr bool vectorized_compound_div_no_gpu
Integral constant indicating if a vectorized compound div assign is possible.
Definition: eval_selectors.hpp:200
constexpr bool sse3_enabled
Indicates if SSE3 is available.
Definition: config.hpp:215
AVX-512F is the max vectorization available.
constexpr bool direct_assign_no_gpu
Integral constant indicating if a direct assign is possible.
Definition: eval_selectors.hpp:166
constexpr bool standard_compound
Integral constant indicating if a standard compound assign is necessary.
Definition: eval_selectors.hpp:118
constexpr bool are_vectorizable
Integral constant indicating if vectorization is possible.
Definition: eval_selectors.hpp:42
constexpr bool direct_compound_div_no_gpu
Integral constant indicating if a direct compound div assign is possible.
Definition: eval_selectors.hpp:206