46 if (
cublas_enabled && is_gpu_computable<E> && is_floating<E> && !no_gpu) {
50 if (
vec_enabled && all_vectorizable<vector_mode, E>) {
57 #ifdef ETL_MANUAL_SELECT 73 std::cerr <<
"Forced selection to VEC sum implementation, but not possible for this expression" << std::endl;
81 std::cerr <<
"Forced selection to CUBLAS sum implementation, but not possible for this expression" << std::endl;
89 std::cerr <<
"Forced selection to BLAS sum implementation, but not possible for this expression" << std::endl;
114 template <
typename E>
116 return select_default_sum_impl<E>(
false);
128 template <
typename E>
130 constexpr_select
const auto impl = select_sum_impl<E>();
135 return impl::vec::sum(e);
140 return impl::blas::sum(e);
144 if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
146 return impl::cublas::sum(e);
148 cpp_unreachable(
"CUBLAS called on invalid types");
154 return impl::standard::sum(e);
166 template <
typename E>
168 constexpr_select
const auto impl = select_sum_impl<E>();
173 return impl::vec::asum(e);
178 return impl::blas::asum(e);
182 if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
184 return impl::cublas::asum(e);
186 cpp_unreachable(
"CUBLAS called on invalid types");
192 return impl::standard::asum(e);
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:167
Absolute Sum operation implementation.
Definition: sum.hpp:162
BLAS implementation of the "sum" reduction.
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220
sum_impl
Enumeration describing the different implementations of sum.
Definition: sum_impl.hpp:20
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:129
Definition: expression_builder.hpp:699
Standard implementation of the "sum" reduction.
Traits to get information about ETL types.
Definition: tmp.hpp:68
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
Unified vectorized implementation of the "sum" reduction.
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
Sum operation implementation.
Definition: sum.hpp:124
CUBLAS implementation of the sum product.
constexpr etl::sum_impl select_default_sum_impl(bool no_gpu)
Select the sum implementation for an expression of type E.
Definition: sum.hpp:42
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76
constexpr etl::sum_impl select_sum_impl()
Select the sum implementation for an expression of type E.
Definition: sum.hpp:115
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25