Expression Templates Library (ETL)
max.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/impl/egblas/max.hpp"
11 
12 namespace etl {
13 
17 template <typename T, typename E>
18 struct max_binary_op {
19  static constexpr bool linear = true;
20  static constexpr bool thread_safe = true;
21  static constexpr bool desc_func = true;
22 
28  template <vector_mode_t V>
29  static constexpr bool vectorizable = !is_complex_t<T>;
30 
34  template <typename L, typename R>
35  static constexpr bool gpu_computable = (all_single_precision<L, R> && impl::egblas::has_smax3) || (all_double_precision<L, R> && impl::egblas::has_dmax3)
36  || (all_complex_single_precision<L, R> && impl::egblas::has_cmax3)
37  || (all_complex_double_precision<L, R> && impl::egblas::has_zmax3);
38 
43  static constexpr int complexity() {
44  return 1;
45  }
46 
50  template <typename V = default_vec>
51  using vec_type = typename V::template vec_type<T>;
52 
59  static constexpr T apply(const T& x, E value) noexcept {
60  if (x > value) {
61  return x;
62  } else {
63  return value;
64  }
65  }
66 
74  template <typename V = default_vec>
75  static vec_type<V> load(const vec_type<V>& lhs, const vec_type<V>& rhs) noexcept {
76  return V::max(lhs, rhs);
77  }
78 
86  template <typename X, typename Y, typename YY>
87  static auto gpu_compute_hint(const X& x, const Y& y, YY&& yy) noexcept {
88  decltype(auto) t1 = smart_gpu_compute_hint(x, yy);
89  decltype(auto) t2 = smart_gpu_compute_hint(y, yy);
90 
91  constexpr size_t inca = gpu_inc<decltype(x)>;
92  constexpr size_t incb = gpu_inc<decltype(y)>;
93 
94  auto t3 = force_temporary_gpu_dim_only(t1);
95 
96  T alpha(1);
97  impl::egblas::max(etl::size(yy), alpha, t1.gpu_memory(), inca, t2.gpu_memory(), incb, t3.gpu_memory(), 1);
98 
99  return t3;
100  }
101 
108  template <typename X, typename Y, typename YY>
109  static YY& gpu_compute(const X& x, const Y& y, YY& yy) noexcept {
110  decltype(auto) t1 = smart_gpu_compute_hint(x, yy);
111  decltype(auto) t2 = smart_gpu_compute_hint(y, yy);
112 
113  constexpr size_t inca = gpu_inc<decltype(x)>;
114  constexpr size_t incb = gpu_inc<decltype(y)>;
115 
116  T alpha(1);
117  impl::egblas::max(etl::size(yy), alpha, t1.gpu_memory(), inca, t2.gpu_memory(), incb, yy.gpu_memory(), 1);
118 
119  yy.validate_gpu();
120  yy.invalidate_cpu();
121 
122  return yy;
123  }
124 
129  static std::string desc() noexcept {
130  return "max";
131  }
132 };
133 
134 } //end of namespace etl
typename V::template vec_type< T > vec_type
Definition: max.hpp:51
Binary operator for scalar maximum.
Definition: max.hpp:18
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: max.hpp:20
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: max.hpp:59
Root namespace for the ETL library.
Definition: adapter.hpp:15
static vec_type< V > load(const vec_type< V > &lhs, const vec_type< V > &rhs) noexcept
Compute several applications of the operator at a time.
Definition: max.hpp:75
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: max.hpp:35
static constexpr bool desc_func
Indicates if the description must be printed as function.
Definition: max.hpp:21
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: max.hpp:29
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: max.hpp:129
static YY & gpu_compute(const X &x, const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: max.hpp:109
decltype(auto) force_temporary_gpu_dim_only(E &&expr)
Force a temporary out of the expression, without copying its content.
Definition: temporary.hpp:223
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
EGBLAS wrappers for the max operation.
static constexpr int complexity()
Estimate the complexity of operator.
Definition: max.hpp:43
static constexpr bool linear
Indicates if the operator is linear or not.
Definition: max.hpp:19
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
static auto gpu_compute_hint(const X &x, const Y &y, YY &&yy) noexcept
Compute the result of the operation using the GPU.
Definition: max.hpp:87