wichtounet/etl/gemm__expr_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #include "etl/expr/base_temporary_expr.hpp"

 //The implementations
 #include "etl/impl/std/gemm.hpp"
 #include "etl/impl/std/strassen_mmul.hpp"
 #include "etl/impl/blas/gemm.hpp"
 #include "etl/impl/vec/gemm.hpp"
 #include "etl/impl/vec/gemm_conv.hpp"
 #include "etl/impl/cublas/gemm.hpp"

 namespace etl {

 template <etl_expr A, etl_expr B, bool Strassen>
 struct gemm_expr : base_temporary_expr_bin<gemm_expr<A, B, Strassen>, A, B> {
     using value_type  = value_t<A>;
     using this_type   = gemm_expr<A, B, Strassen>;
     using base_type   = base_temporary_expr_bin<this_type, A, B>;
     using left_traits = decay_traits<A>;

     static constexpr auto storage_order = left_traits::storage_order;

     static constexpr bool gpu_computable = cublas_enabled && all_homogeneous<A, B>;

     const value_type alpha;

     explicit gemm_expr(A a, B b) : base_type(a, b), alpha(1) {
         //Nothing else to init
     }

     explicit gemm_expr(A a, B b, value_type alpha) : base_type(a, b), alpha(alpha) {
         //Nothing else to init
     }

     template <etl_expr C>
     static void check([[maybe_unused]] const A& a, [[maybe_unused]] const B& b, [[maybe_unused]] const C& c) {
         if constexpr (all_fast<A, B, C>) {
             static_assert(dim<1, A>() == dim<0, B>()         //interior dimensions
                               && dim<0, A>() == dim<0, C>()  //exterior dimension 1
                               && dim<1, B>() == dim<1, C>(), //exterior dimension 2
                           "Invalid sizes for multiplication");
         } else {
             cpp_assert(dim<1>(a) == dim<0>(b)         //interior dimensions
                            && dim<0>(a) == dim<0>(c)  //exterior dimension 1
                            && dim<1>(b) == dim<1>(c), //exterior dimension 2
                        "Invalid sizes for multiplication");
         }
     }

     // Assignment functions

     template <typename AA, typename BB, typename C>
     static constexpr gemm_impl select_default_gemm_impl(bool no_gpu) {
         //Note since these boolean will be known at compile time, the conditions will be a lot simplified
         constexpr bool blas   = cblas_enabled;
         constexpr bool cublas = cublas_enabled;
         constexpr bool homo   = all_homogeneous<AA, BB, C>;

         if (cublas && homo && !no_gpu) {
             return gemm_impl::CUBLAS;
         } else if (blas && homo) {
             return gemm_impl::BLAS;
         }

         if (vec_enabled && vectorize_impl && homo && all_vectorizable_t<vector_mode, AA, BB, C>) {
             return gemm_impl::VEC;
         }

         return gemm_impl::STD;
     }

 #ifdef ETL_MANUAL_SELECT

     template <typename AA, typename BB, typename C>
     static inline gemm_impl select_gemm_impl() {
         auto def = select_default_gemm_impl<AA, BB, C>(local_context().cpu);

         if (local_context().gemm_selector.forced) {
             auto forced = local_context().gemm_selector.impl;

             switch (forced) {
                 //CUBLAS cannot always be used
                 case gemm_impl::CUBLAS:
                     if (!cublas_enabled || !all_homogeneous<AA, BB, C> || local_context().cpu) { //COVERAGE_EXCLUDE_LINE
                         std::cerr << "Forced selection to CUBLAS gemm implementation, but not possible for this expression"
                                   << std::endl; //COVERAGE_EXCLUDE_LINE
                         return def;             //COVERAGE_EXCLUDE_LINE
                     }                           //COVERAGE_EXCLUDE_LINE

                     return forced;

                 //BLAS cannot always be used
                 case gemm_impl::BLAS:
                     if (!cblas_enabled || !all_homogeneous<AA, BB, C>) {                                                                //COVERAGE_EXCLUDE_LINE
                         std::cerr << "Forced selection to BLAS gemm implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
                         return def;                                                                                                     //COVERAGE_EXCLUDE_LINE
                     }                                                                                                                   //COVERAGE_EXCLUDE_LINE

                     return forced;

                 //VEC cannot always be used
                 case gemm_impl::VEC:
                     if (!vec_enabled || !vectorize_impl || !all_vectorizable_t<vector_mode, AA, BB, C> || !all_homogeneous<AA, BB, C>) {                  //COVERAGE_EXCLUDE_LINE
                         std::cerr << "Forced selection to VEC gemm implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
                         return def;                                                                                                    //COVERAGE_EXCLUDE_LINE
                     }                                                                                                                  //COVERAGE_EXCLUDE_LINE

                     return forced;

                 //In other cases, simply use the forced impl
                 default:
                     return forced;
             }
         }

         return def;
     }

 #else

     template <typename AA, typename BB, typename C>
     static constexpr gemm_impl select_gemm_impl() {
         return select_default_gemm_impl<AA, BB, C>(false);
     }

 #endif

     template <typename AA, typename BB, typename C>
     void apply_raw(AA&& a, BB&& b, C&& c) const {
         constexpr_select auto impl = select_gemm_impl<AA, BB, C>();

         // clang-format off

         if constexpr (is_transpose_expr<AA>&& is_transpose_expr<BB>) {
             if constexpr_select(impl == gemm_impl::STD) {
                 inc_counter("impl:std");
                 etl::impl::standard::mm_mul(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::VEC) {
                 inc_counter("impl:vec");
                 etl::impl::vec::gemm(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::BLAS) {
                 inc_counter("impl:blas");
                 etl::impl::blas::gemm_tt(smart_forward(a.a()), smart_forward(b.a()), c, alpha);
             } else if constexpr_select(impl == gemm_impl::CUBLAS) {
                 inc_counter("impl:cublas");
                 etl::impl::cublas::gemm_tt(smart_forward_gpu(a.a()), smart_forward_gpu(b.a()), c, alpha);
             } else {
                 cpp_unreachable("invalid selection of gemm");
             }
         } else if constexpr (!is_transpose_expr<AA> && is_transpose_expr<BB>) {
             if constexpr_select(impl == gemm_impl::STD) {
                 inc_counter("impl:std");
                 etl::impl::standard::mm_mul(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::VEC) {
                 inc_counter("impl:vec");
                 etl::impl::vec::gemm_nt(smart_forward(a), smart_forward(b.a()), c, alpha);
             } else if constexpr_select(impl == gemm_impl::BLAS) {
                 inc_counter("impl:blas");
                 etl::impl::blas::gemm_nt(smart_forward(a), smart_forward(b.a()), c, alpha);
             } else if constexpr_select(impl == gemm_impl::CUBLAS) {
                 inc_counter("impl:cublas");
                 etl::impl::cublas::gemm_nt(smart_forward_gpu(a), smart_forward_gpu(b.a()), c, alpha);
             } else {
                 cpp_unreachable("Invalid selection of gemm");
             }
         } else if constexpr (is_transpose_expr<AA> && !is_transpose_expr<BB>) {
             if constexpr_select(impl == gemm_impl::STD) {
                 inc_counter("impl:std");
                 etl::impl::standard::mm_mul(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::VEC) {
                 inc_counter("impl:vec");
                 etl::impl::vec::gemm_tn(smart_forward(a.a()), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::BLAS) {
                 inc_counter("impl:blas");
                 etl::impl::blas::gemm_tn(smart_forward(a.a()), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::CUBLAS) {
                 inc_counter("impl:cublas");
                 etl::impl::cublas::gemm_tn(smart_forward_gpu(a.a()), smart_forward_gpu(b), c, alpha);
             } else {
                 cpp_unreachable("Invalid selection of gemm");
             }
         } else /*if constexpr (!is_transpose_expr<AA> && !is_transpose_expr<BB>)*/ {
             if constexpr_select(impl == gemm_impl::STD) {
                 inc_counter("impl:std");
                 etl::impl::standard::mm_mul(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::VEC) {
                 inc_counter("impl:vec");
                 etl::impl::vec::gemm(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::BLAS) {
                 inc_counter("impl:blas");
                 etl::impl::blas::gemm(smart_forward(a), smart_forward(b), c, alpha);
             } else if constexpr_select(impl == gemm_impl::CUBLAS) {
                 inc_counter("impl:cublas");
                 etl::impl::cublas::gemm(smart_forward_gpu(a), smart_forward_gpu(b), c, alpha);
             } else {
                 cpp_unreachable("Invalid selection of gemm");
             }
         }

         // clang-format on
     }

     template <etl_expr C>
     void assign_to(C&& c) const {
         inc_counter("temp:assign");

         auto& a = this->a();
         auto& b = this->b();

         check(a, b, c);

         if constexpr (!Strassen) {
             apply_raw(a, b, c);
         } else {
             etl::impl::standard::strassen_mm_mul(smart_forward(a), smart_forward(b), c);
         }
     }

     template <etl_expr L>
     void assign_add_to(L&& lhs) const {
         std_add_evaluate(*this, lhs);
     }

     template <etl_expr L>
     void assign_sub_to(L&& lhs) const {
         std_sub_evaluate(*this, lhs);
     }

     template <etl_expr L>
     void assign_mul_to(L&& lhs) const {
         std_mul_evaluate(*this, lhs);
     }

     template <etl_expr L>
     void assign_div_to(L&& lhs) const {
         std_div_evaluate(*this, lhs);
     }

     template <etl_expr L>
     void assign_mod_to(L&& lhs) const {
         std_mod_evaluate(*this, lhs);
     }

     friend std::ostream& operator<<(std::ostream& os, const gemm_expr& expr) {
         return os << expr._a << " * " << expr._b;
     }
 };

 template <typename A, typename B, bool Strassen>
 struct etl_traits<etl::gemm_expr<A, B, Strassen>> {
     using expr_t       = etl::gemm_expr<A, B, Strassen>;
     using left_expr_t  = std::decay_t<A>;
     using right_expr_t = std::decay_t<B>;
     using left_traits  = etl_traits<left_expr_t>;
     using right_traits = etl_traits<right_expr_t>;
     using value_type   = value_t<A>;

     static constexpr bool is_etl         = true;
     static constexpr bool is_transformer = false;
     static constexpr bool is_view        = false;
     static constexpr bool is_magic_view  = false;
     static constexpr bool is_fast        = left_traits::is_fast && right_traits::is_fast;
     static constexpr bool is_linear      = false;
     static constexpr bool is_thread_safe = true;
     static constexpr bool is_value       = false;
     static constexpr bool is_direct      = true;
     static constexpr bool is_generator   = false;
     static constexpr bool is_padded      = false;
     static constexpr bool is_aligned     = true;
     static constexpr bool is_temporary   = true;
     static constexpr order storage_order = left_traits::storage_order;
     static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;

     template <vector_mode_t V>
     static constexpr bool vectorizable = true;

     template <size_t DD>
     static constexpr size_t dim() {
         return DD == 0 ? decay_traits<A>::template dim<0>() : decay_traits<B>::template dim<1>();
     }

     static size_t dim(const expr_t& e, size_t d) {
         if (d == 0) {
             return etl::dim(e._a, 0);
         } else {
             return etl::dim(e._b, 1);
         }
     }

     static size_t size(const expr_t& e) {
         return etl::dim(e._a, 0) * etl::dim(e._b, 1);
     }

     static constexpr size_t size() {
         return decay_traits<A>::template dim<0>() * decay_traits<B>::template dim<1>();
     }

     static constexpr size_t dimensions() {
         return 2;
     }

     static constexpr int complexity() noexcept {
         return -1;
     }
 };

 // Operators

 template <etl_2d A, etl_2d B>
 auto operator*(A&& a, B&& b) {
     return gemm_expr<detail::build_type<A>, detail::build_type<B>, false>{a, b};
 }

 template <etl_2d A, etl_2d B>
 auto mul(A&& a, B&& b) {
     return gemm_expr<detail::build_type<A>, detail::build_type<B>, false>{a, b};
 }

 // alpha * gemm operators

 template <etl_2d A, etl_2d B>
 gemm_expr<A, B, false> operator*(value_t<A> alpha, gemm_expr<A, B, false>&& gemm) {
     return gemm_expr<A, B, false>{gemm.a(), gemm.b(), alpha};
 }

 template <etl_2d A, etl_2d B>
 gemm_expr<A, B, false> mul(value_t<A> alpha, gemm_expr<A, B, false>&& gemm) {
     return gemm_expr<A, B, false>{gemm.a(), gemm.b(), alpha};
 }

 // Variant with three parameters

 template <etl_2d A, etl_2d B, etl_2d C>
 auto mul(A&& a, B&& b, C&& c) {
     c = mul(a, b);
     return c;
 }

 // Strassen variants

 template <etl_2d A, etl_2d B>
 auto strassen_mul(A&& a, B&& b) {
     return gemm_expr<detail::build_type<A>, detail::build_type<B>, true>{a, b};
 }

 template <etl_2d A, etl_2d B, etl_2d C>
 auto strassen_mul(A&& a, B&& b, C&& c) {
     c = mul(a, b);
     return c;
 }

 } //end of namespace etl
etl::gemm_expr::assign_to
void assign_to(C &&c) const
Assign to a matrix of the same storage order.
Definition: gemm_expr.hpp:257

etl::gemm_expr
A transposition expression.
Definition: gemm_expr.hpp:27

etl::gemm_impl
gemm_impl
Enumeration describing the different matrix-matrix multiplication implementations.
Definition: gemm_impl.hpp:21

etl::base_temporary_expr_bin::_b
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534

etl::mul
auto mul(A &&a, B &&b)
Multiply two matrices together.
Definition: gemm_expr.hpp:442

etl::batch_softmax_impl::STD
Standard implementation.

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::dim
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: gemm_expr.hpp:370

etl::gemm_expr::assign_mul_to
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: gemm_expr.hpp:295

etl::is_magic_view
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::dimensions
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: gemm_expr.hpp:409

etl::gemm_expr::assign_mod_to
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: gemm_expr.hpp:313

etl::base_temporary_expr_bin::_a
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533

etl::gemm_expr::assign_add_to
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: gemm_expr.hpp:277

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::size
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: gemm_expr.hpp:393

etl::strassen_mul
auto strassen_mul(A &&a, B &&b)
Multiply two matrices together using strassen.
Definition: gemm_expr.hpp:494

etl::vectorize_impl
constexpr bool vectorize_impl
Indicates if the implementations can be automatically vectorized by ETL.
Definition: config.hpp:35

etl::vec_enabled
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220

etl::order
order
Storage order of a matrix.
Definition: order.hpp:15

etl::cuda_enabled
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94

etl::gemm_expr::storage_order
static constexpr auto storage_order
The sub storage order.
Definition: gemm_expr.hpp:33

etl::base_temporary_expr_bin
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529

etl::bias_add_impl::VEC
VEC implementation.

etl::operator*
auto operator*(LE &&lhs, RE rhs)
Builds an expression representing the multiplication of lhs and rhs (scalar)
Definition: binary_expression_builder.hpp:149

etl::gemm_expr::select_gemm_impl
static constexpr gemm_impl select_gemm_impl()
Select the best implementation of GEMM.
Definition: gemm_expr.hpp:165

etl::dot_impl::BLAS
BLAS implementation.

etl::base_temporary_expr_bin< gemm_expr< A, B, Strassen >, A, B >::b
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593

etl::gemm_expr::apply_raw
void apply_raw(AA &&a, BB &&b, C &&c) const
Compute C = trans(A) * trans(B)
Definition: gemm_expr.hpp:178

etl::gemm_expr::check
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const B &b, [[maybe_unused]] const C &c)
Assert for the validity of the matrix-matrix multiplication operation.
Definition: gemm_expr.hpp:66

etl::is_fast
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588

etl::etl_traits
Traits to get information about ETL types.
Definition: tmp.hpp:68

etl::dot_impl::CUBLAS
BLAS implementation.

etl
Root namespace for the ETL library.
Definition: adapter.hpp:15

etl::local_context
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::value_type
value_t< A > value_type
The value type of the expression.
Definition: gemm_expr.hpp:339

etl::dim
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25

etl::gemm_expr::assign_div_to
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: gemm_expr.hpp:304

etl::gemm_expr::gemm_expr
gemm_expr(A a, B b)
Construct a new expression.
Definition: gemm_expr.hpp:47

etl::cublas_enabled
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99

etl::gemm_expr::gemm_expr
gemm_expr(A a, B b, value_type alpha)
Construct a new expression.
Definition: gemm_expr.hpp:55

etl::detail::build_type
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24

etl::context::cpu
bool cpu
Force CPU evaluation.
Definition: context.hpp:29

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::size
static constexpr size_t size()
Returns the size of the expression.
Definition: gemm_expr.hpp:401

etl::std_mod_evaluate
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271

etl::gemm_expr::assign_sub_to
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: gemm_expr.hpp:286

etl::std_mul_evaluate
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233

etl::is_transformer
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::left_expr_t
std::decay_t< A > left_expr_t
The left sub expression type.
Definition: gemm_expr.hpp:335

etl::smart_forward_gpu
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::complexity
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: gemm_expr.hpp:417

etl::is_view
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::right_expr_t
std::decay_t< B > right_expr_t
The right sub expression type.
Definition: gemm_expr.hpp:336

etl::gemm_expr::value_type
value_t< A > value_type
The type of value of the expression.
Definition: gemm_expr.hpp:28

etl::etl_traits::is_fast
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25

etl::gemm_expr::operator<<
friend std::ostream & operator<<(std::ostream &os, const gemm_expr &expr)
Print a representation of the expression on the given stream.
Definition: gemm_expr.hpp:323

etl::std_sub_evaluate
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214

etl::smart_forward
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323

etl::cblas_enabled
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76

etl::is_thread_safe
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687

etl::value_t
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81

etl::std_div_evaluate
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252

etl::gemm_expr::gpu_computable
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: gemm_expr.hpp:39

etl::inc_counter
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25

etl::gemm_expr::select_default_gemm_impl
static constexpr gemm_impl select_default_gemm_impl(bool no_gpu)
Select an implementation of GEMM, not considering local context.
Definition: gemm_expr.hpp:87

etl::base_temporary_expr_bin< gemm_expr< A, B, Strassen >, A, B >::a
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577

etl::gemm_expr::alpha
const value_type alpha
The alpha multiplicator.
Definition: gemm_expr.hpp:41

etl::etl_traits< etl::gemm_expr< A, B, Strassen > >::dim
static size_t dim(const expr_t &e, size_t d)
Returns the dth dimension of the expression.
Definition: gemm_expr.hpp:380

etl::std_add_evaluate
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195