18 template <
typename T,
typename E>
29 template <vector_mode_t V>
36 template <
typename L,
typename R>
37 static constexpr
bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
38 || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);
51 template <
typename V = default_vec>
60 static constexpr T
apply(
const T& x, E value) noexcept {
61 return std::pow(x, value);
71 template <
typename V = default_vec>
75 auto t2 = V::mul(y, t1);
86 template <
typename X,
typename Y,
typename YY>
87 static auto gpu_compute_hint([[maybe_unused]]
const X& x, [[maybe_unused]]
const Y& y, YY& yy) noexcept {
94 auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
95 cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 *
sizeof(T), cudaMemcpyHostToDevice));
98 impl::egblas::pow_yx(
etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
110 template <
typename X,
typename Y,
typename YY>
111 static YY&
gpu_compute(
const X& x, [[maybe_unused]]
const Y& y, YY& yy) noexcept {
115 T power_cpu(y.value);
116 auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
117 cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 *
sizeof(T), cudaMemcpyHostToDevice));
120 impl::egblas::pow_yx(
etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
133 static std::string
desc() noexcept {
141 template <
typename T,
typename E>
143 static constexpr
bool linear =
true;
144 static constexpr
bool thread_safe =
true;
145 static constexpr
bool desc_func =
true;
152 template <vector_mode_t V>
153 static constexpr
bool vectorizable =
false;
158 template <
typename L,
typename R>
159 static constexpr
bool gpu_computable =
false;
175 static constexpr T
apply(
const T& x, E value) noexcept {
176 return std::pow(x, value);
183 static std::string
desc() noexcept {
184 return "pow_precise";
191 template <
typename T,
typename E>
193 static constexpr
bool linear =
true;
194 static constexpr
bool thread_safe =
true;
195 static constexpr
bool desc_func =
true;
202 template <vector_mode_t V>
203 static constexpr
bool vectorizable =
false;
208 template <
typename L,
typename R>
209 static constexpr
bool gpu_computable = (is_single_precision_t<T> && impl::egblas::has_spow_yx) || (is_double_precision_t<T> && impl::egblas::has_dpow_yx)
210 || (is_complex_single_t<T> && impl::egblas::has_cpow_yx) || (is_complex_double_t<T> && impl::egblas::has_zpow_yx);
226 static constexpr T
apply(
const T& x, E value) noexcept {
229 for (
size_t i = 0; i < value; ++i) {
243 template <
typename X,
typename Y,
typename YY>
250 T power_cpu(y.value);
251 auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
252 cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 *
sizeof(T), cudaMemcpyHostToDevice));
255 impl::egblas::pow_yx(
etl::size(yy), alpha, power_gpu.get(), 0, t2.gpu_memory(), 1);
267 template <
typename X,
typename Y,
typename YY>
268 static YY&
gpu_compute(
const X& x, [[maybe_unused]]
const Y& y, YY& yy) noexcept {
272 T power_cpu(y.value);
273 auto power_gpu = impl::cuda::cuda_allocate_only<T>(1);
274 cuda_check(cudaMemcpy(power_gpu.get(), &power_cpu, 1 *
sizeof(T), cudaMemcpyHostToDevice));
277 impl::egblas::pow_yx(
etl::size(yy), alpha, power_gpu.get(), 0, yy.gpu_memory(), 1);
290 static std::string
desc() noexcept {
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:44
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:175
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:60
static ETL_STRONG_INLINE(vec_type< V >) load(const vec_type< V > &x
Compute several applications of the operator at a time.
Binary operator for scalar power.
Definition: pow.hpp:19
static constexpr bool desc_func
Indicates if the description must be printed as function.
Definition: pow.hpp:22
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:111
static constexpr bool vectorizable
Indicates if the expression is vectorizable using the given vector mode.
Definition: pow.hpp:30
SSE3 is the max vectorization available.
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:165
static constexpr T apply(const T &x, E value) noexcept
Apply the unary operator on lhs and rhs.
Definition: pow.hpp:226
static auto gpu_compute_hint(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:244
constexpr bool intel_compiler
Indicates if the projectis compiled with intel compiler.
Definition: config.hpp:225
Binary operator for scalar power with an integer as the exponent.
Definition: pow.hpp:192
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Root namespace for the ETL library.
Definition: adapter.hpp:15
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:133
Binary operator for scalar power with stable precision.
Definition: pow.hpp:142
typename V::template vec_type< T > vec_type
Definition: pow.hpp:52
static constexpr bool linear
Indicates if the operator is linear or not.
Definition: pow.hpp:20
EGBLAS wrappers for the pow operation.
static constexpr bool thread_safe
Indicates if the operator is thread safe or not.
Definition: pow.hpp:21
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
auto exp(E &&value) -> detail::unary_helper< E, exp_unary_op >
Apply exponential on each value of the given expression.
Definition: function_expression_builder.hpp:154
AVX is the max vectorization available.
decltype(auto) force_temporary_gpu(E &&expr)
Force a temporary out of the expression.
Definition: temporary.hpp:196
EGBLAS wrappers for the pow_yx operation.
static constexpr bool gpu_computable
Indicates if the operator can be computed on GPU.
Definition: pow.hpp:37
static YY & gpu_compute(const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:268
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:183
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
auto log(E &&value) -> detail::unary_helper< E, log_unary_op >
Apply logarithm (base e) on each value of the given expression.
Definition: function_expression_builder.hpp:64
static std::string desc() noexcept
Returns a textual representation of the operator.
Definition: pow.hpp:290
decltype(auto) smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and store this representation in ...
Definition: helpers.hpp:397
static auto gpu_compute_hint([[maybe_unused]] const X &x, [[maybe_unused]] const Y &y, YY &yy) noexcept
Compute the result of the operation using the GPU.
Definition: pow.hpp:87
static constexpr int complexity()
Estimate the complexity of operator.
Definition: pow.hpp:216