wichtounet/etl/op_2binary_2pow_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #include "etl/impl/egblas/pow.hpp"
 #include "etl/impl/egblas/pow_yx.hpp"

 namespace etl {

 template <typename T, typename E>
 struct pow_binary_op {
     static constexpr bool linear      = true;
     static constexpr bool thread_safe = true;
     static constexpr bool desc_func   = true;

     template <vector_mode_t V>
     static constexpr bool vectorizable =
         (V == vector_mode_t::SSE3 && is_single_precision_t<T>) || (V == vector_mode_t::AVX && is_single_precision_t<T>) || (intel_compiler && !is_complex_t<T>);

     template <typename L, typename R>
     static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
                                            || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);

     static constexpr int complexity() {
         return 16;
     }

     template <typename V = default_vec>
     using vec_type = typename V::template vec_type<T>;

     static constexpr T apply(const T& x, E value) noexcept {
         return std::pow(x, value);
     }

     template <typename V = default_vec>
     static ETL_STRONG_INLINE(vec_type<V>) load(const vec_type<V>& x, const vec_type<V>& y) noexcept {
         // Use pow(x, y) = exp(y * log(x))
         auto t1 = V::log(x);
         auto t2 = V::mul(y, t1);
         return V::exp(t2);
     }

     template <typename X, typename Y, typename YY>
     static auto gpu_compute_hint([[maybe_unused]] const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
         decltype(auto) t1 = smart_gpu_compute_hint(x, yy);

         auto t2 = force_temporary_gpu(t1);

 #ifdef ETL_CUDA
         T power_cpu(y.value);
         auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
         cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));

         T alpha(1.0);
         impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
 #endif

         return t2;
     }

     template <typename X, typename Y, typename YY>
     static YY& gpu_compute(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
         smart_gpu_compute(x, yy);

 #ifdef ETL_CUDA
         T power_cpu(y.value);
         auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
         cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));

         T alpha(1.0);
         impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
 #endif

         yy.validate_gpu();
         yy.invalidate_cpu();

         return yy;
     }

     static std::string desc() noexcept {
         return "pow";
     }
 };

 template <typename T, typename E>
 struct precise_pow_binary_op {
     static constexpr bool linear      = true;
     static constexpr bool thread_safe = true;
     static constexpr bool desc_func   = true;

     template <vector_mode_t V>
     static constexpr bool vectorizable = false;

     template <typename L, typename R>
     static constexpr bool gpu_computable = false;

     static constexpr int complexity() {
         return 16;
     }

     static constexpr T apply(const T& x, E value) noexcept {
         return std::pow(x, value);
     }

     static std::string desc() noexcept {
         return "pow_precise";
     }
 };

 template <typename T, typename E>
 struct integer_pow_binary_op {
     static constexpr bool linear      = true;
     static constexpr bool thread_safe = true;
     static constexpr bool desc_func   = true;

     template <vector_mode_t V>
     static constexpr bool vectorizable = false;

     template <typename L, typename R>
     static constexpr bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
                                            || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);

     static constexpr int complexity() {
         return 16;
     }

     static constexpr T apply(const T& x, E value) noexcept {
         T r(1);

         for (size_t i = 0; i < value; ++i) {
             r *= x;
         }

         return r;
     }

     template <typename X, typename Y, typename YY>
     static auto gpu_compute_hint(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
         decltype(auto) t1 = smart_gpu_compute_hint(x, yy);

         auto t2 = force_temporary_gpu(t1);

 #ifdef ETL_CUDA
         T power_cpu(y.value);
         auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
         cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));

         T alpha(1.0);
         impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
 #endif

         return t2;
     }

     template <typename X, typename Y, typename YY>
     static YY& gpu_compute(const X& x, [[maybe_unused]] const Y& y, YY& yy) noexcept {
         smart_gpu_compute(x, yy);

 #ifdef ETL_CUDA
         T power_cpu(y.value);
         auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
         cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 * sizeof(T), cudaMemcpyHostToDevice));

         T alpha(1.0);
         impl::egblas::pow_yx(etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
 #endif

         yy.validate_gpu();
         yy.invalidate_cpu();

         return yy;
     }

     static std::string desc() noexcept {
         return "pow";
     }
 };

 } //end of namespace etl
etl::pow_binary_op::complexity
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:44

etl::precise_pow_binary_op::apply
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:175

etl::pow_binary_op::apply
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:60

etl::pow_binary_op::ETL_STRONG_INLINE
static ETL_STRONG_INLINE(vec_type< V >) load(const vec_type< V > &x
Compute several applications of the operator at a time.

etl::pow_binary_op
Binary operator for scalar power.
Definition: pow.hpp:19

etl::pow_binary_op::desc_func
static constexpr bool desc_func
Indicates if the description must be printed as function.
Definition: pow.hpp:22

etl::pow_binary_op::gpu_compute
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:111

etl::pow_binary_op::vectorizable
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: pow.hpp:30

etl::vector_mode_t::SSE3
SSE3 is the max vectorization available.

etl::precise_pow_binary_op::complexity
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:165

etl::integer_pow_binary_op::apply
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:226

etl::integer_pow_binary_op::gpu_compute_hint
static auto gpu_compute_hint(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:244

etl::intel_compiler
constexpr bool intel_compiler
Indicates if the projectis compiled with intel compiler.
Definition: config.hpp:225

etl::integer_pow_binary_op
Binary operator for scalar power with an integer as the exponent.
Definition: pow.hpp:192

etl::load
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143

etl
Root namespace for the ETL library.
Definition: adapter.hpp:15

etl::pow_binary_op::desc
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:133

etl::precise_pow_binary_op
Binary operator for scalar power with stable precision.
Definition: pow.hpp:142

etl::pow_binary_op::vec_type
typename V::template vec_type< T > vec_type
Definition: pow.hpp:52

etl::pow_binary_op::linear
static constexpr bool linear
Indicates if the operator is linear or not.
Definition: pow.hpp:20

pow.hpp
EGBLAS wrappers for the pow operation.

etl::pow_binary_op::thread_safe
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: pow.hpp:21

etl::size
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108

etl::exp
auto exp(E &&value) -> detail::unary_helper< E, exp_unary_op >
Apply exponential on each value of the given expression.
Definition: function_expression_builder.hpp:154

etl::vector_mode_t::AVX
AVX is the max vectorization available.

etl::force_temporary_gpu
decltype(auto) force_temporary_gpu(E &&expr)
Force a temporary out of the expression.
Definition: temporary.hpp:196

pow_yx.hpp
EGBLAS wrappers for the pow_yx operation.

etl::pow_binary_op::gpu_computable
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: pow.hpp:37

etl::integer_pow_binary_op::gpu_compute
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:268

etl::precise_pow_binary_op::desc
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:183

etl::smart_gpu_compute_hint
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368

etl::log
auto log(E &&value) -> detail::unary_helper< E, log_unary_op >
Apply logarithm (base e) on each value of the given expression.
Definition: function_expression_builder.hpp:64

etl::integer_pow_binary_op::desc
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:290

etl::smart_gpu_compute
decltype(auto) smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and store this representation in ...
Definition: helpers.hpp:397

etl::pow_binary_op::gpu_compute_hint
static auto gpu_compute_hint([[maybe_unused]] const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:87

etl::integer_pow_binary_op::complexity
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:216