wichtounet/etl/sum_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 //Include the implementations
 #include "etl/impl/std/sum.hpp"
 #include "etl/impl/vec/sum.hpp"
 #include "etl/impl/blas/sum.hpp"
 #include "etl/impl/cublas/sum.hpp"

 namespace etl::detail {

 template <typename E>
 constexpr etl::sum_impl select_default_sum_impl(bool no_gpu) {
     //Note: since the constexpr values will be known at compile time, the
     //conditions will be a lot simplified

     if (cublas_enabled && is_gpu_computable<E> && is_floating<E> && !no_gpu) {
         return etl::sum_impl::CUBLAS;
     }

     if (vec_enabled && all_vectorizable<vector_mode, E>) {
         return etl::sum_impl::VEC;
     }

     return etl::sum_impl::STD;
 }

 #ifdef ETL_MANUAL_SELECT

 template <typename E>
 etl::sum_impl select_sum_impl() {
     if (local_context().sum_selector.forced) {
         auto forced = local_context().sum_selector.impl;

         switch (forced) {
             //VEC cannot always be used
             case sum_impl::VEC:
                 if (!vec_enabled || !decay_traits<E>::template vectorizable<vector_mode>) {                                       //COVERAGE_EXCLUDE_LINE
                     std::cerr << "Forced selection to VEC sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
                     return select_default_sum_impl<E>(local_context().cpu);                                                       //COVERAGE_EXCLUDE_LINE
                 }                                                                                                                 //COVERAGE_EXCLUDE_LINE

                 return forced;

             case sum_impl::CUBLAS:
                 if (!cublas_enabled || !is_gpu_computable<E> || !is_floating<E> || local_context().cpu) {                                       //COVERAGE_EXCLUDE_LINE
                     std::cerr << "Forced selection to CUBLAS sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
                     return select_default_sum_impl<E>(local_context().cpu);                                                          //COVERAGE_EXCLUDE_LINE
                 }                                                                                                                    //COVERAGE_EXCLUDE_LINE

                 return forced;

             case sum_impl::BLAS:
                 if (!cblas_enabled || !is_dma<E> || !is_floating<E>) {                                                             //COVERAGE_EXCLUDE_LINE
                     std::cerr << "Forced selection to BLAS sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
                     return select_default_sum_impl<E>(local_context().cpu);                                                        //COVERAGE_EXCLUDE_LINE
                 }                                                                                                                  //COVERAGE_EXCLUDE_LINE

                 return forced;

             //In other cases, simply use the forced impl
             default:
                 return forced;
         }
     }

     return select_default_sum_impl<E>(local_context().cpu);
 }

 #else

 template <typename E>
 constexpr etl::sum_impl select_sum_impl() {
     return select_default_sum_impl<E>(false);
 }

 #endif

 struct sum_impl {
     template <typename E>
     static value_t<E> apply(const E& e) {
         constexpr_select const auto impl = select_sum_impl<E>();

         if
             constexpr_select(impl == etl::sum_impl::VEC) {
                 inc_counter("impl:vec");
                 return impl::vec::sum(e);
             }
         else if
             constexpr_select(impl == etl::sum_impl::BLAS) {
                 inc_counter("impl:blas");
                 return impl::blas::sum(e);
             }
         else if
             constexpr_select(impl == etl::sum_impl::CUBLAS) {
                 if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
                     inc_counter("impl:cublas");
                     return impl::cublas::sum(e);
                 } else {
                     cpp_unreachable("CUBLAS called on invalid types");
                     return value_t<E>(0);
                 }
             }
         else {
             inc_counter("impl:std");
             return impl::standard::sum(e);
         }
     }
 };

 struct asum_impl {
     template <typename E>
     static value_t<E> apply(const E& e) {
         constexpr_select const auto impl = select_sum_impl<E>();

         if
             constexpr_select(impl == etl::sum_impl::VEC) {
                 inc_counter("impl:vec");
                 return impl::vec::asum(e);
             }
         else if
             constexpr_select(impl == etl::sum_impl::BLAS) {
                 inc_counter("impl:blas");
                 return impl::blas::asum(e);
             }
         else if
             constexpr_select(impl == etl::sum_impl::CUBLAS) {
                 if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
                     inc_counter("impl:cublas");
                     return impl::cublas::asum(e);
                 } else {
                     cpp_unreachable("CUBLAS called on invalid types");
                     return value_t<E>(0);
                 }
             }
         else {
             inc_counter("impl:std");
             return impl::standard::asum(e);
         }
     }
 };

 } //end of namespace etl::detail
etl::detail::asum_impl::apply
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:167

etl::detail::asum_impl
Absolute Sum operation implementation.
Definition: sum.hpp:162

etl::batch_softmax_impl::STD
Standard implementation.

sum.hpp
BLAS implementation of the "sum" reduction.

etl::vec_enabled
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220

etl::bias_add_impl::VEC
VEC implementation.

etl::sum_impl
sum_impl
Enumeration describing the different implementations of sum.
Definition: sum_impl.hpp:20

etl::dot_impl::BLAS
BLAS implementation.

etl::detail::sum_impl::apply
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:129

etl::detail
Definition: expression_builder.hpp:699

sum.hpp
Standard implementation of the "sum" reduction.

etl::etl_traits
Traits to get information about ETL types.
Definition: tmp.hpp:68

etl::dot_impl::CUBLAS
BLAS implementation.

etl::local_context
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50

etl::cublas_enabled
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99

sum.hpp
Unified vectorized implementation of the "sum" reduction.

etl::context::cpu
bool cpu
Force CPU evaluation.
Definition: context.hpp:29

etl::detail::sum_impl
Sum operation implementation.
Definition: sum.hpp:124

sum.hpp
CUBLAS implementation of the sum product.

etl::detail::select_default_sum_impl
constexpr etl::sum_impl select_default_sum_impl(bool no_gpu)
Select the sum implementation for an expression of type E.
Definition: sum.hpp:42

etl::cblas_enabled
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76

etl::detail::select_sum_impl
constexpr etl::sum_impl select_sum_impl()
Select the sum implementation for an expression of type E.
Definition: sum.hpp:115

etl::value_t
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81

etl::inc_counter
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25