Expression Templates Library (ETL)
tanh.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/impl/egblas/tanh.hpp"
11 
12 namespace etl {
13 
18 template <typename T>
19 struct tanh_unary_op {
20  static constexpr bool linear = true;
21  static constexpr bool thread_safe = true;
22 
28  template <vector_mode_t V>
29  static constexpr bool vectorizable =
30  (V == vector_mode_t::SSE3 && !is_complex_t<T>) || (V == vector_mode_t::AVX && !is_complex_t<T>) || (intel_compiler && !is_complex_t<T>);
31 
35  template <typename E>
36  static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_stanh) || (is_double_precision_t<T> && impl::egblas::has_dtanh)
37  || (is_complex_single_t<T> && impl::egblas::has_ctanh) || (is_complex_double_t<T> && impl::egblas::has_ztanh);
38 
43  static constexpr int complexity() {
44  return 16;
45  }
46 
50  template <typename V = default_vec>
51  using vec_type = typename V::template vec_type<T>;
52 
58  static constexpr T apply(const T& x) noexcept {
59  return std::tanh(x);
60  }
61 
68  template <typename V = default_vec>
69  static vec_type<V> load(const vec_type<V>& x) noexcept {
70  auto ex = V::exp(x);
71  auto emx = V::exp(V::minus(x));
72  return V::div(V::sub(ex, emx), V::add(ex, emx));
73  }
74 
82  template <typename X, typename Y>
83  static auto gpu_compute_hint(const X& x, Y& y) noexcept {
84  decltype(auto) t1 = smart_gpu_compute_hint(x, y);
85 
86  auto t2 = force_temporary_gpu_dim_only(t1);
87 
88  T alpha(1.0);
89  impl::egblas::tanh(etl::size(y), alpha, t1.gpu_memory(), 1, t2.gpu_memory(), 1);
90 
91  return t2;
92  }
93 
100  template <typename X, typename Y>
101  static Y& gpu_compute(const X& x, Y& y) noexcept {
102  decltype(auto) t1 = select_smart_gpu_compute(x, y);
103 
104  T alpha(1.0);
105  impl::egblas::tanh(etl::size(y), alpha, t1.gpu_memory(), 1, y.gpu_memory(), 1);
106 
107  y.validate_gpu();
108  y.invalidate_cpu();
109 
110  return y;
111  }
112 
117  static std::string desc() noexcept {
118  return "tanh";
119  }
120 };
121 
126 template <typename TT>
127 struct tanh_unary_op<etl::complex<TT>> {
128  using T = etl::complex<TT>;
129 
130  static constexpr bool linear = true;
131  static constexpr bool thread_safe = true;
132 
138  template <vector_mode_t V>
139  static constexpr bool vectorizable = false;
140 
144  template <typename E>
145  static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_stanh) || (is_double_precision_t<T> && impl::egblas::has_dtanh)
146  || (is_complex_single_t<T> && impl::egblas::has_ctanh) || (is_complex_double_t<T> && impl::egblas::has_ztanh);
147 
152  static constexpr int complexity() {
153  return 16;
154  }
155 
161  static constexpr T apply(const T& x) noexcept {
162  return etl::tanh(x);
163  }
164 
172  template <typename X, typename Y>
173  static auto gpu_compute_hint(const X& x, Y& y) noexcept {
174  decltype(auto) t1 = smart_gpu_compute_hint(x, y);
175 
176  auto t2 = force_temporary_gpu_dim_only(t1);
177 
178  T alpha(1.0);
179  impl::egblas::tanh(etl::size(y), alpha, t1.gpu_memory(), 1, t2.gpu_memory(), 1);
180 
181  return t2;
182  }
183 
190  template <typename X, typename Y>
191  static Y& gpu_compute(const X& x, Y& y) noexcept {
192  decltype(auto) t1 = select_smart_gpu_compute(x, y);
193 
194  T alpha(1.0);
195  impl::egblas::tanh(etl::size(y), alpha, t1.gpu_memory(), 1, y.gpu_memory(), 1);
196 
197  y.validate_gpu();
198  y.invalidate_cpu();
199 
200  return y;
201  }
202 
207  static std::string desc() noexcept {
208  return "tanh";
209  }
210 };
211 
212 } //end of namespace etl
static Y & gpu_compute(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: tanh.hpp:191
Complex number implementation.
Definition: complex.hpp:31
EGBLAS wrappers for the tanh operation.
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: tanh.hpp:29
Unary operation computing the hyperbolic tangent.
Definition: tanh.hpp:19
decltype(auto) select_smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and possibly store this represent...
Definition: helpers.hpp:434
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: tanh.hpp:207
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: tanh.hpp:117
SSE3 is the max vectorization available.
static Y & gpu_compute(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: tanh.hpp:101
constexpr bool intel_compiler
Indicates if the projectis compiled with intel compiler.
Definition: config.hpp:225
Root namespace for the ETL library.
Definition: adapter.hpp:15
auto tanh(E &&value) -> detail::unary_helper< E, tanh_unary_op >
Apply hyperbolic tangent on each value of the given expression.
Definition: function_expression_builder.hpp:124
static constexpr T apply(const T &x) noexcept
Apply the unary operator on x.
Definition: tanh.hpp:58
static constexpr int complexity()
Estimate the complexity of operator.
Definition: tanh.hpp:152
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: tanh.hpp:21
decltype(auto) force_temporary_gpu_dim_only(E &&expr)
Force a temporary out of the expression, without copying its content.
Definition: temporary.hpp:223
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
static constexpr bool linear
Indicates if the operator is linear.
Definition: tanh.hpp:20
auto exp(E &&value) -> detail::unary_helper< E, exp_unary_op >
Apply exponential on each value of the given expression.
Definition: function_expression_builder.hpp:154
static vec_type< V > load(const vec_type< V > &x) noexcept
Compute several applications of the operator at a time.
Definition: tanh.hpp:69
AVX is the max vectorization available.
typename V::template vec_type< T > vec_type
Definition: tanh.hpp:51
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: tanh.hpp:36
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
static constexpr T apply(const T &x) noexcept
Apply the unary operator on x.
Definition: tanh.hpp:161
static auto gpu_compute_hint(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: tanh.hpp:83
static auto gpu_compute_hint(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: tanh.hpp:173
static constexpr int complexity()
Estimate the complexity of operator.
Definition: tanh.hpp:43