21 template <vector_mode_t V>
31 static inline auto load(T&& x,
size_t i) {
32 return x.template load<vect_impl>(i);
42 template <vector_mode_t V>
51 template <
typename L_Expr,
typename R_Expr>
52 static void apply(L_Expr&& lhs, R_Expr&& rhs) {
57 auto* lhs_mem = lhs.memory_start();
59 constexpr
bool remainder = !
padding || !all_padded<L_Expr, R_Expr>;
61 const size_t last = remainder ? prev_multiple(N, IT::size) : N;
69 for (; i < last; i += IT::size) {
70 lhs.template stream<vect_impl>(
load(rhs, i), i);
73 for (; remainder && i < N; ++i) {
83 for (; i + (IT::size * 3) < last; i += 4 * IT::size) {
84 lhs.template store<vect_impl>(
load(rhs, i + 0 * IT::size), i + 0 * IT::size);
85 lhs.template store<vect_impl>(
load(rhs, i + 1 * IT::size), i + 1 * IT::size);
86 lhs.template store<vect_impl>(
load(rhs, i + 2 * IT::size), i + 2 * IT::size);
87 lhs.template store<vect_impl>(
load(rhs, i + 3 * IT::size), i + 3 * IT::size);
90 for (; i < last; i += IT::size) {
91 lhs.template store<vect_impl>(
load(rhs, i), i);
94 for (; remainder && i < N; ++i) {
103 template <vector_mode_t V>
112 template <
typename L_Expr,
typename R_Expr>
113 static void apply(L_Expr&& lhs, R_Expr&& rhs) {
118 auto* lhs_mem = lhs.memory_start();
120 constexpr
bool remainder = !
padding || !all_padded<L_Expr, R_Expr>;
122 const size_t last = remainder ? prev_multiple(N, IT::size) : N;
126 for (; i + (IT::size * 3) < last; i += 4 * IT::size) {
127 lhs.template store<vect_impl>(vect_impl::add(
load(lhs, i + 0 * IT::size),
load(rhs, i + 0 * IT::size)), i + 0 * IT::size);
128 lhs.template store<vect_impl>(vect_impl::add(
load(lhs, i + 1 * IT::size),
load(rhs, i + 1 * IT::size)), i + 1 * IT::size);
129 lhs.template store<vect_impl>(vect_impl::add(
load(lhs, i + 2 * IT::size),
load(rhs, i + 2 * IT::size)), i + 2 * IT::size);
130 lhs.template store<vect_impl>(vect_impl::add(
load(lhs, i + 3 * IT::size),
load(rhs, i + 3 * IT::size)), i + 3 * IT::size);
133 for (; i < last; i += IT::size) {
134 lhs.template store<vect_impl>(vect_impl::add(
load(lhs, i),
load(rhs, i)), i);
137 for (; remainder && i < N; ++i) {
138 lhs_mem[i] += rhs[i];
146 template <vector_mode_t V>
155 template <
typename L_Expr,
typename R_Expr>
156 static void apply(L_Expr&& lhs, R_Expr&& rhs) {
161 auto* lhs_mem = lhs.memory_start();
163 constexpr
bool remainder = !
padding || !all_padded<L_Expr, R_Expr>;
165 const size_t last = remainder ? prev_multiple(N, IT::size) : N;
169 for (; i + (IT::size * 3) < last; i += 4 * IT::size) {
170 lhs.template store<vect_impl>(vect_impl::sub(
load(lhs, i + 0 * IT::size),
load(rhs, i + 0 * IT::size)), i + 0 * IT::size);
171 lhs.template store<vect_impl>(vect_impl::sub(
load(lhs, i + 1 * IT::size),
load(rhs, i + 1 * IT::size)), i + 1 * IT::size);
172 lhs.template store<vect_impl>(vect_impl::sub(
load(lhs, i + 2 * IT::size),
load(rhs, i + 2 * IT::size)), i + 2 * IT::size);
173 lhs.template store<vect_impl>(vect_impl::sub(
load(lhs, i + 3 * IT::size),
load(rhs, i + 3 * IT::size)), i + 3 * IT::size);
176 for (; i < last; i += IT::size) {
177 lhs.template store<vect_impl>(vect_impl::sub(
load(lhs, i),
load(rhs, i)), i);
180 for (; remainder && i < N; ++i) {
181 lhs_mem[i] -= rhs[i];
189 template <vector_mode_t V>
198 template <
typename L_Expr,
typename R_Expr>
199 static void apply(L_Expr&& lhs, R_Expr&& rhs) {
204 auto* lhs_mem = lhs.memory_start();
206 constexpr
bool remainder = !
padding || !all_padded<L_Expr, R_Expr>;
208 const size_t last = remainder ? prev_multiple(N, IT::size) : N;
212 for (; i + (IT::size * 3) < last; i += 4 * IT::size) {
213 lhs.template store<vect_impl>(vect_impl::mul(
load(lhs, i + 0 * IT::size),
load(rhs, i + 0 * IT::size)), i + 0 * IT::size);
214 lhs.template store<vect_impl>(vect_impl::mul(
load(lhs, i + 1 * IT::size),
load(rhs, i + 1 * IT::size)), i + 1 * IT::size);
215 lhs.template store<vect_impl>(vect_impl::mul(
load(lhs, i + 2 * IT::size),
load(rhs, i + 2 * IT::size)), i + 2 * IT::size);
216 lhs.template store<vect_impl>(vect_impl::mul(
load(lhs, i + 3 * IT::size),
load(rhs, i + 3 * IT::size)), i + 3 * IT::size);
219 for (; i < last; i += IT::size) {
220 lhs.template store<vect_impl>(vect_impl::mul(
load(lhs, i),
load(rhs, i)), i);
223 for (; remainder && i < N; ++i) {
224 lhs_mem[i] *= rhs[i];
232 template <vector_mode_t V>
241 template <
typename L_Expr,
typename R_Expr>
242 static void apply(L_Expr&& lhs, R_Expr&& rhs) {
247 auto* lhs_mem = lhs.memory_start();
249 constexpr
bool remainder = !
padding || !all_padded<L_Expr, R_Expr>;
251 const size_t last = remainder ? prev_multiple(N, IT::size) : N;
255 for (; i + (IT::size * 3) < last; i += 4 * IT::size) {
256 lhs.template store<vect_impl>(vect_impl::div(
load(lhs, i + 0 * IT::size),
load(rhs, i + 0 * IT::size)), i + 0 * IT::size);
257 lhs.template store<vect_impl>(vect_impl::div(
load(lhs, i + 1 * IT::size),
load(rhs, i + 1 * IT::size)), i + 1 * IT::size);
258 lhs.template store<vect_impl>(vect_impl::div(
load(lhs, i + 2 * IT::size),
load(rhs, i + 2 * IT::size)), i + 2 * IT::size);
259 lhs.template store<vect_impl>(vect_impl::div(
load(lhs, i + 3 * IT::size),
load(rhs, i + 3 * IT::size)), i + 3 * IT::size);
262 for (; i < last; i += IT::size) {
263 lhs.template store<vect_impl>(vect_impl::div(
load(lhs, i),
load(rhs, i)), i);
266 for (; remainder && i < N; ++i) {
267 lhs_mem[i] /= rhs[i];
constexpr bool padding
Indicates if ETL is allowed to pad matrices and vectors.
Definition: config.hpp:135
Traits to get the intrinsic traits for a vector mode.
Definition: vectorization.hpp:88
Functor for vectorized compound assign div.
Definition: vec_eval_functors.hpp:233
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:113
Functor for vectorized compound assign add.
Definition: vec_eval_functors.hpp:104
Definition: expression_builder.hpp:699
Common base for vectorized functors.
Definition: vec_eval_functors.hpp:22
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
constexpr size_t stream_threshold
The threshold at which stream is used.
Definition: threshold.hpp:80
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:156
Functor for vectorized assign.
Definition: vec_eval_functors.hpp:43
Functor for vectorized compound assign sub.
Definition: vec_eval_functors.hpp:147
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:52
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:242
typename get_vector_impl< V >::type vect_impl
The vectorization type.
Definition: vec_eval_functors.hpp:23
Functor for vectorized compound assign mul.
Definition: vec_eval_functors.hpp:190
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:199
static auto load(T &&x, size_t i)
Load a vector from lhs at position i.
Definition: vec_eval_functors.hpp:31
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
constexpr bool streaming
Indicates if ETL is allowed to perform streaming (non-temporal writes).
Definition: config.hpp:130
Vectorization support when no vectorization is enabled.
Definition: no_vectorization.hpp:29