Expression Templates Library (ETL)
gevm_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
12 //The implementations
13 #include "etl/impl/std/gemm.hpp"
14 #include "etl/impl/blas/gemm.hpp"
15 #include "etl/impl/vec/gevm.hpp"
16 #include "etl/impl/vec/gemm_conv.hpp"
17 #include "etl/impl/cublas/gemm.hpp"
18 
19 namespace etl {
20 
25 template <etl_expr A, etl_expr B>
26 struct gevm_expr : base_temporary_expr_bin<gevm_expr<A, B>, A, B> {
31 
32  static constexpr auto storage_order = left_traits::storage_order;
33 
38  static constexpr bool gpu_computable = cublas_enabled && all_homogeneous<A, B>;
39 
44  explicit gevm_expr(A a, B b) : base_type(a, b) {
45  //Nothing else to init
46  }
47 
54  template <etl_expr C>
55  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const B& b, [[maybe_unused]] const C& c) {
56  if constexpr (all_fast<A, B, C>) {
57  static_assert(dim<0, A>() == dim<0, B>() //exterior dimension 1
58  && dim<1, B>() == dim<0, C>(), //exterior dimension 2
59  "Invalid sizes for multiplication");
60  } else {
61  cpp_assert(dim<0>(a) == dim<0>(b) //exterior dimension 1
62  && dim<1>(b) == dim<0>(c), //exterior dimension 2
63  "Invalid sizes for multiplication");
64  }
65  }
66 
67  // Assignment functions
68 
73  template <etl_expr C>
74  static constexpr gemm_impl select_default_gevm_impl(bool no_gpu) {
75  constexpr bool vec_possible = vectorize_impl && all_vectorizable_t<vector_mode, A, B, C> && vec_enabled;
76  constexpr bool homo = all_homogeneous<A, B, C>;
77 
78  if (cublas_enabled && homo && !no_gpu) {
79  return gemm_impl::CUBLAS;
80  }
81 
82  if (vec_possible && homo) {
83  return gemm_impl::VEC;
84  }
85 
86  if (cblas_enabled && homo) {
87  return gemm_impl::BLAS;
88  }
89 
90  return gemm_impl::STD;
91  }
92 
93 #ifdef ETL_MANUAL_SELECT
94 
99  template <etl_expr C>
100  static inline gemm_impl select_gevm_impl() {
101  if (local_context().gemm_selector.forced) {
102  auto forced = local_context().gemm_selector.impl;
103 
104  switch (forced) {
105  //CUBLAS cannot always be used
106  case gemm_impl::CUBLAS:
107  if (!cublas_enabled || !all_homogeneous<A, B, C> || local_context().cpu) { //COVERAGE_EXCLUDE_LINE
108  std::cerr << "Forced selection to CUBLAS gevm implementation, but not possible for this expression"
109  << std::endl; //COVERAGE_EXCLUDE_LINE
110  return select_default_gevm_impl<C>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
111  } //COVERAGE_EXCLUDE_LINE
112 
113  return forced;
114 
115  //BLAS cannot always be used
116  case gemm_impl::BLAS:
117  if (!cblas_enabled || !all_homogeneous<A, B, C>) { //COVERAGE_EXCLUDE_LINE
118  std::cerr << "Forced selection to BLAS gevm implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
119  return select_default_gevm_impl<C>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
120  } //COVERAGE_EXCLUDE_LINE
121 
122  return forced;
123 
124  //VEC cannot always be used
125  case gemm_impl::VEC:
126  if (!vec_enabled || !vectorize_impl || !all_vectorizable<vector_mode, A, B, C> || !all_homogeneous<A, B, C>) { //COVERAGE_EXCLUDE_LINE
127  std::cerr << "Forced selection to VEC gevm implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
128  return select_default_gevm_impl<C>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
129  } //COVERAGE_EXCLUDE_LINE
130 
131  return forced;
132 
133  //In other cases, simply use the forced impl
134  default:
135  return forced;
136  }
137  }
138 
139  return select_default_gevm_impl<C>(local_context().cpu);
140  }
141 
142 #else
143 
149  template <etl_expr C>
150  static constexpr gemm_impl select_gevm_impl() {
151  return select_default_gevm_impl<C>(false);
152  }
153 
154 #endif
155 
162  template <etl_expr AA, etl_expr BB, etl_expr C>
163  static void apply_raw(AA&& a, BB&& b, C&& c) {
164  constexpr_select auto impl = select_gevm_impl<C>();
165 
166  // clang-format off
167  if constexpr (is_transpose_expr<BB>) {
168  if constexpr_select (impl == gemm_impl::STD) {
169  inc_counter("impl:std");
170  etl::impl::standard::vm_mul(smart_forward(a), smart_forward(b), c);
171  } else if constexpr_select (impl == gemm_impl::BLAS) {
172  inc_counter("impl:blas");
173  etl::impl::blas::gevm_t(smart_forward(a), smart_forward(b.a()), c);
174  } else if constexpr_select (impl == gemm_impl::VEC) {
175  inc_counter("impl:vec");
176  etl::impl::vec::gevm_t(smart_forward(a), smart_forward(b.a()), c);
177  } else if constexpr_select (impl == gemm_impl::CUBLAS) {
178  inc_counter("impl:cublas");
179  etl::impl::cublas::gevm_t(smart_forward_gpu(a), smart_forward_gpu(b.a()), c);
180  } else {
181  cpp_unreachable("Invalid selection for gevm");
182  }
183  } else {
184  if constexpr_select (impl == gemm_impl::STD) {
185  inc_counter("impl:std");
186  etl::impl::standard::vm_mul(smart_forward(a), smart_forward(b), c);
187  } else if constexpr_select (impl == gemm_impl::BLAS) {
188  inc_counter("impl:blas");
189  etl::impl::blas::gevm(smart_forward(a), smart_forward(b), c);
190  } else if constexpr_select (impl == gemm_impl::VEC) {
191  inc_counter("impl:vec");
192  etl::impl::vec::gevm(smart_forward(a), smart_forward(b), c);
193  } else if constexpr_select (impl == gemm_impl::CUBLAS) {
194  inc_counter("impl:cublas");
195  etl::impl::cublas::gevm(smart_forward_gpu(a), smart_forward_gpu(b), c);
196  } else {
197  cpp_unreachable("Invalid selection for gevm");
198  }
199  }
200  // clang-format on
201  }
202 
207  template <etl_expr C>
208  void assign_to(C&& c) const {
209  inc_counter("temp:assign");
210 
211  check(this->a(), this->b(), c);
212 
213  apply_raw(this->a(), this->b(), c);
214  }
215 
220  template <etl_expr L>
221  void assign_add_to(L&& lhs) const {
222  std_add_evaluate(*this, lhs);
223  }
224 
229  template <etl_expr L>
230  void assign_sub_to(L&& lhs) const {
231  std_sub_evaluate(*this, lhs);
232  }
233 
238  template <etl_expr L>
239  void assign_mul_to(L&& lhs) const {
240  std_mul_evaluate(*this, lhs);
241  }
242 
247  template <etl_expr L>
248  void assign_div_to(L&& lhs) const {
249  std_div_evaluate(*this, lhs);
250  }
251 
256  template <etl_expr L>
257  void assign_mod_to(L&& lhs) const {
258  std_mod_evaluate(*this, lhs);
259  }
260 
267  friend std::ostream& operator<<(std::ostream& os, const gevm_expr& expr) {
268  return os << expr._a << " * " << expr._b;
269  }
270 };
271 
276 template <etl_expr A, etl_expr B>
277 struct etl_traits<etl::gevm_expr<A, B>> {
279  using left_expr_t = std::decay_t<A>;
280  using right_expr_t = std::decay_t<B>;
284 
285  static constexpr bool is_etl = true;
286  static constexpr bool is_transformer = false;
287  static constexpr bool is_view = false;
288  static constexpr bool is_magic_view = false;
289  static constexpr bool is_fast = left_traits::is_fast && right_traits::is_fast;
290  static constexpr bool is_linear = false;
291  static constexpr bool is_thread_safe = true;
292  static constexpr bool is_value = false;
293  static constexpr bool is_direct = true;
294  static constexpr bool is_generator = false;
295  static constexpr bool is_padded = false;
296  static constexpr bool is_aligned = true;
297  static constexpr bool is_temporary = true;
298  static constexpr bool gpu_computable = is_gpu_t<value_type> && cublas_enabled;
299  static constexpr order storage_order = left_traits::storage_order;
300 
306  template <vector_mode_t V>
307  static constexpr bool vectorizable = true;
308 
313  template <size_t DD>
314  static constexpr size_t dim() {
315  return decay_traits<B>::template dim<1>();
316  }
317 
324  static size_t dim(const expr_t& e, [[maybe_unused]] size_t d) {
325  return etl::dim(e._b, 1);
326  }
327 
333  static size_t size(const expr_t& e) {
334  return etl::dim(e._b, 1);
335  }
336 
341  static constexpr size_t size() {
342  return decay_traits<B>::template dim<1>();
343  }
344 
349  static constexpr size_t dimensions() {
350  return 1;
351  }
352 
357  static constexpr int complexity() noexcept {
358  return -1;
359  }
360 };
361 
368 template <etl_1d A, etl_2d B>
369 auto operator*(A&& a, B&& b) {
371 }
372 
379 template <etl_1d A, etl_2d B>
380 auto mul(A&& a, B&& b) {
382 }
383 
391 template <etl_1d A, etl_2d B, etl_1d C>
392 auto mul(A&& a, B&& b, C&& c) {
393  c = mul(a, b);
394  return c;
395 }
396 
397 } //end of namespace etl
static constexpr size_t size()
Returns the size of the expression.
Definition: gevm_expr.hpp:341
gemm_impl
Enumeration describing the different matrix-matrix multiplication implementations.
Definition: gemm_impl.hpp:21
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534
auto mul(A &&a, B &&b)
Multiply two matrices together.
Definition: gemm_expr.hpp:442
Standard implementation.
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: gevm_expr.hpp:314
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
static void apply_raw(AA &&a, BB &&b, C &&c)
Compute C = a * trans(B)
Definition: gevm_expr.hpp:163
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: gevm_expr.hpp:230
constexpr bool vectorize_impl
Indicates if the implementations can be automatically vectorized by ETL.
Definition: config.hpp:35
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220
order
Storage order of a matrix.
Definition: order.hpp:15
gevm_expr(A a, B b)
Construct a new expression.
Definition: gevm_expr.hpp:44
static size_t dim(const expr_t &e, [[maybe_unused]] size_t d)
Returns the dth dimension of the expression.
Definition: gevm_expr.hpp:324
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
VEC implementation.
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: gevm_expr.hpp:257
auto operator*(LE &&lhs, RE rhs)
Builds an expression representing the multiplication of lhs and rhs (scalar)
Definition: binary_expression_builder.hpp:149
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: gevm_expr.hpp:349
friend std::ostream & operator<<(std::ostream &os, const gevm_expr &expr)
Print a representation of the expression on the given stream.
Definition: gevm_expr.hpp:267
BLAS implementation.
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
static constexpr gemm_impl select_default_gevm_impl(bool no_gpu)
Select an implementation of GEVM, not considering local context.
Definition: gevm_expr.hpp:74
Traits to get information about ETL types.
Definition: tmp.hpp:68
BLAS implementation.
Root namespace for the ETL library.
Definition: adapter.hpp:15
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: gevm_expr.hpp:248
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
std::decay_t< B > right_expr_t
The right sub expression type.
Definition: gevm_expr.hpp:280
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: gevm_expr.hpp:357
static constexpr auto storage_order
The sub storage order.
Definition: gevm_expr.hpp:32
static constexpr gemm_impl select_gevm_impl()
Select the best implementation of GEMV.
Definition: gevm_expr.hpp:150
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: gevm_expr.hpp:221
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const B &b, [[maybe_unused]] const C &c)
Assert for the validity of the matrix-matrix multiplication operation.
Definition: gevm_expr.hpp:55
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: gevm_expr.hpp:239
std::decay_t< A > left_expr_t
The left sub expression type.
Definition: gevm_expr.hpp:279
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: gevm_expr.hpp:38
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
value_t< A > value_type
The type of value of the expression.
Definition: gevm_expr.hpp:27
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76
value_t< A > value_type
The value type of the expression.
Definition: gevm_expr.hpp:283
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
void assign_to(C &&c) const
Assign to a matrix of the same storage order.
Definition: gevm_expr.hpp:208
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
A transposition expression.
Definition: gevm_expr.hpp:26
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: gevm_expr.hpp:333