17 template <
typename L,
typename R>
19 static constexpr
bool value =
false;
22 template <
typename T0,
typename T1,
typename T2,
typename RightExpr,
typename R>
24 static constexpr
bool value = !is_scalar<R>;
29 template <
typename L,
typename R>
31 static constexpr
bool value =
false;
34 template <
typename T0,
typename T1,
typename T2,
typename LeftExpr,
typename R>
36 static constexpr
bool value = !is_scalar<R>;
41 template <
typename L,
typename R>
43 static constexpr
bool value =
false;
46 template <
typename T0,
typename T1,
typename T2,
typename RightExpr,
typename L>
48 static constexpr
bool value = !is_scalar<L> && !is_scalar<RightExpr>;
53 template <
typename L,
typename R>
55 static constexpr
bool value =
false;
58 template <
typename T0,
typename T1,
typename T2,
typename LeftExpr,
typename L>
60 static constexpr
bool value = !is_scalar<LeftExpr>;
65 template <
typename L,
typename R>
67 static constexpr
bool value =
false;
70 template <
typename LT1,
typename LT2,
typename LT3,
typename LRightExpr,
typename RT1,
typename RT2,
typename RT3,
typename RRightExpr>
72 binary_expr<RT1, etl::scalar<RT2>, etl::mul_binary_op<RT3>, RRightExpr>> {
73 static constexpr
bool value =
true;
78 template <
typename L,
typename R>
80 static constexpr
bool value =
false;
83 template <
typename LT1,
typename LT2,
typename LT3,
typename LRightExpr,
typename RT1,
typename RT2,
typename RT3,
typename RLeftExpr>
85 binary_expr<RT1, RLeftExpr, etl::mul_binary_op<RT3>, etl::scalar<RT2>>> {
86 static constexpr
bool value =
true;
91 template <
typename L,
typename R>
93 static constexpr
bool value =
false;
96 template <
typename LT1,
typename LT2,
typename LT3,
typename LLeftExpr,
typename RT1,
typename RT2,
typename RT3,
typename RRightExpr>
98 binary_expr<RT1, etl::scalar<RT2>, etl::mul_binary_op<RT3>, RRightExpr>> {
99 static constexpr
bool value =
true;
104 template <
typename L,
typename R>
106 static constexpr
bool value =
false;
109 template <
typename LT1,
typename LT2,
typename LT3,
typename LLeftExpr,
typename RT1,
typename RT2,
typename RT3,
typename RLeftExpr>
111 binary_expr<RT1, RLeftExpr, etl::mul_binary_op<RT3>, etl::scalar<RT2>>> {
112 static constexpr
bool value =
true;
117 template <
typename L,
typename R>
120 template <
typename L,
typename R>
123 template <
typename L,
typename R>
126 template <
typename L,
typename R>
129 template <
typename L,
typename R>
130 static constexpr
bool is_axpby = is_axpby_left_left<L, R> || is_axpby_right_right<L, R> || is_axpby_left_right<L, R> || is_axpby_right_left<L, R>;
132 template <
typename L,
typename R>
135 template <
typename L,
typename R>
138 template <
typename L,
typename R>
141 template <
typename L,
typename R>
144 template <
typename L,
typename R>
145 static constexpr
bool is_axpy = is_axpy_left_left<L, R> || is_axpy_left_right<L, R> || is_axpy_right_left<L, R> || is_axpy_right_right<L, R>;
147 template <
typename L,
typename R>
148 static constexpr
bool is_special_plus = is_axpy<L, R> || is_axpby<L, R>;
153 template <
typename T>
155 static constexpr
bool linear =
true;
156 static constexpr
bool thread_safe =
true;
157 static constexpr
bool desc_func =
false;
164 template <vector_mode_t V>
165 static constexpr
bool vectorizable =
true;
170 template <
typename L,
typename R>
171 static constexpr
bool gpu_computable =
172 ((!is_scalar<L> && !is_scalar<R>)&&((is_single_precision_t<T> && impl::egblas::has_saxpy_3 && impl::egblas::has_saxpby_3)
173 || (is_double_precision_t<T> && impl::egblas::has_daxpy_3 && impl::egblas::has_daxpby_3)
174 || (is_complex_single_t<T> && impl::egblas::has_caxpy_3 && impl::egblas::has_caxpby_3)
175 || (is_complex_double_t<T> && impl::egblas::has_zaxpy_3 && impl::egblas::has_zaxpby_3)))
176 || ((is_scalar<L> != is_scalar<R>)&&((is_single_precision_t<T> && impl::egblas::has_scalar_sadd)
177 || (is_double_precision_t<T> && impl::egblas::has_scalar_dadd)
178 || (is_complex_single_t<T> && impl::egblas::has_scalar_cadd)
179 || (is_complex_double_t<T> && impl::egblas::has_scalar_zadd)));
192 template <
typename V = default_vec>
201 static constexpr T
apply(
const T& lhs,
const T& rhs) noexcept {
212 template <
typename V = default_vec>
214 return V::add(lhs, rhs);
225 template <
typename L,
typename R,
typename Y>
228 gpu_compute(lhs, rhs, t3);
240 template <
typename L,
typename R,
typename Y>
241 static Y&
gpu_compute(
const L& lhs,
const R& rhs, Y& yy) noexcept {
242 if constexpr (!is_scalar<L> && !is_scalar<R> && is_axpy_left_left<L, R>) {
243 auto& lhs_lhs = lhs.get_lhs();
244 auto& lhs_rhs = lhs.get_rhs();
249 constexpr
auto incx = gpu_inc<decltype(lhs_rhs)>;
250 constexpr
auto incy = gpu_inc<decltype(rhs)>;
252 impl::egblas::axpy_3(
etl::size(yy), lhs_lhs.value, x.gpu_memory(), incx, y.gpu_memory(), incy, yy.gpu_memory(), 1);
253 }
else if constexpr (!is_scalar<L> && !is_scalar<R> && is_axpy_left_right<L, R>) {
254 auto& lhs_lhs = lhs.get_lhs();
255 auto& lhs_rhs = lhs.get_rhs();
260 constexpr
auto incx = gpu_inc<decltype(lhs_lhs)>;
261 constexpr
auto incy = gpu_inc<decltype(rhs)>;
263 impl::egblas::axpy_3(
etl::size(yy), lhs_rhs.value, x.gpu_memory(), incx, y.gpu_memory(), incy, yy.gpu_memory(), 1);
264 }
else if constexpr (is_axpy_right_left<L, R>) {
265 auto& rhs_lhs = rhs.get_lhs();
266 auto& rhs_rhs = rhs.get_rhs();
271 constexpr
auto incx = gpu_inc<decltype(rhs_rhs)>;
272 constexpr
auto incy = gpu_inc<decltype(lhs)>;
274 impl::egblas::axpy_3(
etl::size(yy), rhs_lhs.value, x.gpu_memory(), incx, y.gpu_memory(), incy, yy.gpu_memory(), 1);
275 }
else if constexpr (is_axpy_right_right<L, R>) {
276 auto& rhs_lhs = rhs.get_lhs();
277 auto& rhs_rhs = rhs.get_rhs();
282 constexpr
auto incx = gpu_inc<decltype(rhs_lhs)>;
283 constexpr
auto incy = gpu_inc<decltype(lhs)>;
285 impl::egblas::axpy_3(
etl::size(yy), rhs_rhs.value, x.gpu_memory(), incx, y.gpu_memory(), incy, yy.gpu_memory(), 1);
286 }
else if constexpr (is_axpby_left_left<L, R>) {
287 auto& lhs_lhs = lhs.get_lhs();
288 auto& lhs_rhs = lhs.get_rhs();
290 auto& rhs_lhs = rhs.get_lhs();
291 auto& rhs_rhs = rhs.get_rhs();
296 constexpr
auto incx = gpu_inc<decltype(lhs_rhs)>;
297 constexpr
auto incy = gpu_inc<decltype(rhs_rhs)>;
299 impl::egblas::axpby_3(
etl::size(yy), lhs_lhs.value, x.gpu_memory(), incx, rhs_lhs.value, y.gpu_memory(), incy, yy.gpu_memory(), 1);
300 }
else if constexpr (is_axpby_left_right<L, R>) {
301 auto& lhs_lhs = lhs.get_lhs();
302 auto& lhs_rhs = lhs.get_rhs();
304 auto& rhs_lhs = rhs.get_lhs();
305 auto& rhs_rhs = rhs.get_rhs();
310 constexpr
auto incx = gpu_inc<decltype(lhs_rhs)>;
311 constexpr
auto incy = gpu_inc<decltype(rhs_lhs)>;
313 impl::egblas::axpby_3(
etl::size(yy), lhs_lhs.value, x.gpu_memory(), incx, rhs_rhs.value, y.gpu_memory(), incy, yy.gpu_memory(), 1);
314 }
else if constexpr (is_axpby_right_left<L, R>) {
315 auto& lhs_lhs = lhs.get_lhs();
316 auto& lhs_rhs = lhs.get_rhs();
318 auto& rhs_lhs = rhs.get_lhs();
319 auto& rhs_rhs = rhs.get_rhs();
324 constexpr
auto incx = gpu_inc<decltype(lhs_lhs)>;
325 constexpr
auto incy = gpu_inc<decltype(rhs_rhs)>;
327 impl::egblas::axpby_3(
etl::size(yy), lhs_rhs.value, x.gpu_memory(), incx, rhs_lhs.value, y.gpu_memory(), incy, yy.gpu_memory(), 1);
328 }
else if constexpr (is_axpby_right_right<L, R>) {
329 auto& lhs_lhs = lhs.get_lhs();
330 auto& lhs_rhs = lhs.get_rhs();
332 auto& rhs_lhs = rhs.get_lhs();
333 auto& rhs_rhs = rhs.get_rhs();
338 constexpr
auto incx = gpu_inc<decltype(lhs_lhs)>;
339 constexpr
auto incy = gpu_inc<decltype(rhs_lhs)>;
341 impl::egblas::axpby_3(
etl::size(yy), lhs_rhs.value, x.gpu_memory(), incx, rhs_rhs.value, y.gpu_memory(), incy, yy.gpu_memory(), 1);
342 }
else if constexpr (!is_scalar<L> && !is_scalar<R> && !is_special_plus<L, R>) {
346 constexpr
auto incx = gpu_inc<decltype(lhs)>;
347 constexpr
auto incy = gpu_inc<decltype(rhs)>;
350 impl::egblas::axpy_3(
etl::size(yy), alpha, x.gpu_memory(), incx, y.gpu_memory(), incy, yy.gpu_memory(), 1);
351 }
else if constexpr (is_scalar<L> && !is_scalar<R>) {
356 impl::egblas::scalar_add(yy.gpu_memory(),
etl::size(yy), 1,
s);
357 }
else if constexpr (!is_scalar<L> && is_scalar<R>) {
362 impl::egblas::scalar_add(yy.gpu_memory(),
etl::size(yy), 1,
s);
375 static std::string
desc() noexcept {
auto s(T &&value)
Force the evaluation of the given expression.
Definition: stop.hpp:18
typename V::template vec_type< T > vec_type
Definition: plus.hpp:193
static Y & gpu_compute(const L &lhs, const R &rhs, Y &yy) noexcept
Compute the result of the operation using the GPU.
Definition: plus.hpp:241
static constexpr T apply(const T &lhs, const T &rhs) noexcept
Apply the unary operator on lhs and rhs.
Definition: plus.hpp:201
A binary expression.
Definition: binary_expr.hpp:18
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Root namespace for the ETL library.
Definition: adapter.hpp:15
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: plus.hpp:375
decltype(auto) force_temporary_gpu_dim_only(E &&expr)
Force a temporary out of the expression, without copying its content.
Definition: temporary.hpp:223
Represents a scalar value.
Definition: concepts_base.hpp:19
static constexpr int complexity()
Estimate the complexity of operator.
Definition: plus.hpp:185
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
Binary operator for scalar multiplication.
Definition: div.hpp:13
static auto gpu_compute_hint(const L &lhs, const R &rhs, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: plus.hpp:226
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
decltype(auto) smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and store this representation in ...
Definition: helpers.hpp:397
Binary operator for scalar addition.
Definition: plus.hpp:154