Expression Templates Library (ETL)
sigmoid.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
12 
13 namespace etl {
14 
19 template <typename T>
21  static constexpr bool linear = true;
22  static constexpr bool thread_safe = true;
23 
29  template <vector_mode_t V>
30  static constexpr bool vectorizable =
31  (V == vector_mode_t::SSE3 && !is_complex_t<T>) || (V == vector_mode_t::AVX && !is_complex_t<T>) || (intel_compiler && !is_complex_t<T>);
32 
36  template <typename E>
37  static constexpr bool gpu_computable = is_floating<E>&& cudnn_enabled;
38 
43  static constexpr int complexity() {
44  return 12;
45  }
46 
50  template <typename V = default_vec>
51  using vec_type = typename V::template vec_type<T>;
52 
58  static constexpr T apply(const T& x) {
59  return math::logistic_sigmoid(x);
60  }
61 
68  template <typename V = default_vec>
69  static vec_type<V> load(const vec_type<V>& x) noexcept {
70  auto one = V::set(T(1));
71 
72  auto t1 = V::minus(x);
73  auto t2 = V::exp(t1);
74  auto t3 = V::add(one, t2);
75  return V::div(one, t3);
76  }
77 
85  template <typename X, typename Y>
86  static auto gpu_compute_hint(const X& x, Y& y) noexcept {
87  decltype(auto) t1 = smart_gpu_compute_hint(x, y);
88 
89  auto t2 = force_temporary_gpu_dim_only(t1);
90 
91  auto n = etl::size(x);
92 
93  if (n < 8 * 1024 * 1024 && is_single_precision<Y> && impl::egblas::has_ssigmoid) {
94  impl::egblas::sigmoid(n, 1, t1.gpu_memory(), 1, t2.gpu_memory(), 1);
95  } else if (n < 1024 * 1024 && is_double_precision<Y> && impl::egblas::has_dsigmoid) {
96  impl::egblas::sigmoid(n, 1, t1.gpu_memory(), 1, t2.gpu_memory(), 1);
97  } else {
98  impl::cudnn::sigmoid(t1, t2);
99  }
100 
101  return t2;
102  }
109  template <typename X, typename Y>
110  static Y& gpu_compute(const X& x, Y& y) noexcept {
111  decltype(auto) t1 = select_smart_gpu_compute(x, y);
112 
113  auto n = etl::size(x);
114 
115  if (n < 8 * 1024 * 1024 && is_single_precision<Y> && impl::egblas::has_ssigmoid) {
116  impl::egblas::sigmoid(n, 1, t1.gpu_memory(), 1, y.gpu_memory(), 1);
117 
118  y.validate_gpu();
119  y.invalidate_cpu();
120  } else if (n < 1024 * 1024 && is_double_precision<Y> && impl::egblas::has_dsigmoid) {
121  impl::egblas::sigmoid(n, 1, t1.gpu_memory(), 1, y.gpu_memory(), 1);
122 
123  y.validate_gpu();
124  y.invalidate_cpu();
125  } else {
126  impl::cudnn::sigmoid(t1, y);
127  }
128 
129  return y;
130  }
131 
136  static std::string desc() noexcept {
137  return "sigmoid";
138  }
139 };
140 
145 template <typename T>
147  static constexpr bool linear = true;
148  static constexpr bool thread_safe = true;
149 
155  template <vector_mode_t V>
156  static constexpr bool vectorizable = false;
157 
161  template <typename E>
162  static constexpr bool gpu_computable = false;
163 
168  static constexpr int complexity() {
169  return 4;
170  }
171 
177  static T apply(const T& v) {
178  auto x = 0.5 * v;
179 
180  T z;
181  if (x >= 0) {
182  if (x < 1.7) {
183  z = (1.5 * x / (1 + x));
184  } else if (x < 3) {
185  z = (0.935409070603099 + 0.0458812946797165 * (x - 1.7));
186  } else {
187  z = 0.99505475368673;
188  }
189  } else {
190  auto xx = -x;
191  if (xx < 1.7) {
192  z = (1.5 * xx / (1 + xx));
193  } else if (xx < 3) {
194  z = (0.935409070603099 + 0.0458812946797165 * (xx - 1.7));
195  } else {
196  z = 0.99505475368673;
197  }
198  z = -z;
199  }
200 
201  return 0.5 * (z + 1.0);
202  }
203 
208  static std::string desc() noexcept {
209  return "fast_sigmoid";
210  }
211 };
212 
213 } //end of namespace etl
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: sigmoid.hpp:22
static constexpr int complexity()
Estimate the complexity of operator.
Definition: sigmoid.hpp:43
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: sigmoid.hpp:37
Unary operation computing a fast sigmoid approximation.
Definition: sigmoid.hpp:146
EGBLAS wrappers for the sigmoid operation.
decltype(auto) select_smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and possibly store this represent...
Definition: helpers.hpp:434
static constexpr T apply(const T &x)
Apply the unary operator on x.
Definition: sigmoid.hpp:58
static T apply(const T &v)
Apply the unary operator on x.
Definition: sigmoid.hpp:177
Unary operation computing the logistic sigmoid.
Definition: sigmoid.hpp:20
SSE3 is the max vectorization available.
Convolution implementations with NVidia cuDNN library.
static constexpr bool linear
Indicates if the operator is linear.
Definition: sigmoid.hpp:21
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: sigmoid.hpp:136
constexpr bool intel_compiler
Indicates if the projectis compiled with intel compiler.
Definition: config.hpp:225
Root namespace for the ETL library.
Definition: adapter.hpp:15
static constexpr int complexity()
Estimate the complexity of operator.
Definition: sigmoid.hpp:168
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
static auto gpu_compute_hint(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: sigmoid.hpp:86
decltype(auto) force_temporary_gpu_dim_only(E &&expr)
Force a temporary out of the expression, without copying its content.
Definition: temporary.hpp:223
static Y & gpu_compute(const X &x, Y &y) noexcept
Compute the result of the operation using the GPU.
Definition: sigmoid.hpp:110
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
auto exp(E &&value) -> detail::unary_helper< E, exp_unary_op >
Apply exponential on each value of the given expression.
Definition: function_expression_builder.hpp:154
AVX is the max vectorization available.
typename V::template vec_type< T > vec_type
Definition: sigmoid.hpp:51
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: sigmoid.hpp:30
static vec_type< V > load(const vec_type< V > &x) noexcept
Compute several applications of the operator at a time.
Definition: sigmoid.hpp:69
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: sigmoid.hpp:208