Expression Templates Library (ETL)
pow.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/impl/egblas/pow.hpp"
12 
13 namespace etl {
14 
18 template <typename T, typename E>
19 struct pow_binary_op {
20  static constexpr bool linear = true;
21  static constexpr bool thread_safe = true;
22  static constexpr bool desc_func = true;
23 
29  template <vector_mode_t V>
30  static constexpr bool vectorizable =
31  (V == vector_mode_t::SSE3 && is_single_precision_t<T>) || (V == vector_mode_t::AVX && is_single_precision_t<T>) || (intel_compiler && !is_complex_t<T>);
32 
36  template <typename L, typename R>
37  static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
38  || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);
39 
44  static constexpr int complexity() {
45  return 16;
46  }
47 
51  template <typename V = default_vec>
52  using vec_type = typename V::template vec_type<T>;
53 
60  static constexpr T apply(const T& x, E value) noexcept {
61  return std::pow(x, value);
62  }
63 
71  template <typename V = default_vec>
72  static ETL_STRONG_INLINE(vec_type<V>) load(const vec_type<V>& x, const vec_type<V>& y) noexcept {
73  // Use pow(x, y) = exp(y * log(x))
74  auto t1 = V::log(x);
75  auto t2 = V::mul(y, t1);
76  return V::exp(t2);
77  }
78 
86  template <typename X, typename Y, typename YY>
87  static auto gpu_compute_hint([[maybe_unused]] const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
88  decltype(auto) t1 = smart_gpu_compute_hint(x, yy);
89 
90  auto t2 = force_temporary_gpu(t1);
91 
92 #ifdef ETL_CUDA
93  T power_cpu(y.value);
94  auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
95  cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));
96 
97  T alpha(1.0);
98  impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
99 #endif
100 
101  return t2;
102  }
103 
110  template <typename X, typename Y, typename YY>
111  static YY& gpu_compute(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
112  smart_gpu_compute(x, yy);
113 
114 #ifdef ETL_CUDA
115  T power_cpu(y.value);
116  auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
117  cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));
118 
119  T alpha(1.0);
120  impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
121 #endif
122 
123  yy.validate_gpu();
124  yy.invalidate_cpu();
125 
126  return yy;
127  }
128 
133  static std::string desc() noexcept {
134  return "pow";
135  }
136 };
137 
141 template <typename T, typename E>
143  static constexpr bool linear = true;
144  static constexpr bool thread_safe = true;
145  static constexpr bool desc_func = true;
146 
152  template <vector_mode_t V>
153  static constexpr bool vectorizable = false;
154 
158  template <typename L, typename R>
159  static constexpr bool gpu_computable = false;
160 
165  static constexpr int complexity() {
166  return 16;
167  }
168 
175  static constexpr T apply(const T& x, E value) noexcept {
176  return std::pow(x, value);
177  }
178 
183  static std::string desc() noexcept {
184  return "pow_precise";
185  }
186 };
187 
191 template <typename T, typename E>
193  static constexpr bool linear = true;
194  static constexpr bool thread_safe = true;
195  static constexpr bool desc_func = true;
196 
202  template <vector_mode_t V>
203  static constexpr bool vectorizable = false;
204 
208  template <typename L, typename R>
209  static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
210  || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);
211 
216  static constexpr int complexity() {
217  return 16;
218  }
219 
226  static constexpr T apply(const T& x, E value) noexcept {
227  T r(1);
228 
229  for (size_t i = 0; i < value; ++i) {
230  r *= x;
231  }
232 
233  return r;
234  }
235 
243  template <typename X, typename Y, typename YY>
244  static auto gpu_compute_hint(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
245  decltype(auto) t1 = smart_gpu_compute_hint(x, yy);
246 
247  auto t2 = force_temporary_gpu(t1);
248 
249 #ifdef ETL_CUDA
250  T power_cpu(y.value);
251  auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
252  cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));
253 
254  T alpha(1.0);
255  impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
256 #endif
257 
258  return t2;
259  }
260 
267  template <typename X, typename Y, typename YY>
268  static YY& gpu_compute(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
269  smart_gpu_compute(x, yy);
270 
271 #ifdef ETL_CUDA
272  T power_cpu(y.value);
273  auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
274  cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));
275 
276  T alpha(1.0);
277  impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
278 #endif
279 
280  yy.validate_gpu();
281  yy.invalidate_cpu();
282 
283  return yy;
284  }
285 
290  static std::string desc() noexcept {
291  return "pow";
292  }
293 };
294 
295 } //end of namespace etl
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:44
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:175
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:60
static ETL_STRONG_INLINE(vec_type< V >) load(const vec_type< V > &x
Compute several applications of the operator at a time.
Binary operator for scalar power.
Definition: pow.hpp:19
static constexpr bool desc_func
Indicates if the description must be printed as function.
Definition: pow.hpp:22
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:111
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: pow.hpp:30
SSE3 is the max vectorization available.
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:165
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:226
static auto gpu_compute_hint(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:244
constexpr bool intel_compiler
Indicates if the projectis compiled with intel compiler.
Definition: config.hpp:225
Binary operator for scalar power with an integer as the exponent.
Definition: pow.hpp:192
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Root namespace for the ETL library.
Definition: adapter.hpp:15
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:133
Binary operator for scalar power with stable precision.
Definition: pow.hpp:142
typename V::template vec_type< T > vec_type
Definition: pow.hpp:52
static constexpr bool linear
Indicates if the operator is linear or not.
Definition: pow.hpp:20
EGBLAS wrappers for the pow operation.
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: pow.hpp:21
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
auto exp(E &&value) -> detail::unary_helper< E, exp_unary_op >
Apply exponential on each value of the given expression.
Definition: function_expression_builder.hpp:154
AVX is the max vectorization available.
decltype(auto) force_temporary_gpu(E &&expr)
Force a temporary out of the expression.
Definition: temporary.hpp:196
EGBLAS wrappers for the pow_yx operation.
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: pow.hpp:37
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:268
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:183
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
auto log(E &&value) -> detail::unary_helper< E, log_unary_op >
Apply logarithm (base e) on each value of the given expression.
Definition: function_expression_builder.hpp:64
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:290
decltype(auto) smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and store this representation in ...
Definition: helpers.hpp:397
static auto gpu_compute_hint([[maybe_unused]] const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:87
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:216