Expression Templates Library (ETL)
batch_outer_product_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
12 //Include the implementations
13 #include "etl/impl/std/outer.hpp"
14 #include "etl/impl/blas/outer.hpp"
16 #include "etl/impl/vec/outer.hpp"
17 
18 namespace etl {
19 
24 template <etl_expr A, etl_expr B>
25 struct batch_outer_product_expr : base_temporary_expr_bin<batch_outer_product_expr<A, B>, A, B> {
30 
31  static constexpr auto storage_order = left_traits::storage_order;
32 
37  static constexpr bool gpu_computable = cublas_enabled;
38 
43  explicit batch_outer_product_expr(A a, B b) : base_type(a, b) {
44  //Nothing else to init
45  }
46 
47  // Assignment functions
48 
57  template <typename C>
58  static constexpr etl::outer_impl select_default_batch_outer_impl(bool no_gpu) {
59  if (cublas_enabled && !no_gpu) {
61  }
62 
63  if (cblas_enabled) {
64  return etl::outer_impl::BLAS;
65  }
66 
67  if (vec_enabled) {
68  return etl::outer_impl::VEC;
69  }
70 
71  return etl::outer_impl::STD;
72  }
73 
74 #ifdef ETL_MANUAL_SELECT
75 
81  template <typename C>
83  if (local_context().outer_selector.forced) {
84  auto forced = local_context().outer_selector.impl;
85 
86  switch (forced) {
87  //BLAS cannot always be used
88  case outer_impl::BLAS:
89  if (!cblas_enabled) {
90  std::cerr << "Forced selection to BLAS outer implementation, but not possible for this expression" << std::endl;
91  return select_default_batch_outer_impl<C>(local_context().cpu);
92  }
93 
94  return forced;
95 
96  //CUBLAS cannot always be used
97  case outer_impl::CUBLAS:
98  if (!cublas_enabled || local_context().cpu) {
99  std::cerr << "Forced selection to CUBLAS outer implementation, but not possible for this expression" << std::endl;
100  return select_default_batch_outer_impl<C>(local_context().cpu);
101  }
102 
103  return forced;
104 
105  //VEC cannot always be used
106  case outer_impl::VEC:
107  if (!vec_enabled) {
108  std::cerr << "Forced selection to VEC outer implementation, but not possible for this expression" << std::endl;
109  return select_default_batch_outer_impl<C>(local_context().cpu);
110  }
111 
112  return forced;
113 
114  //In other cases, simply use the forced impl
115  default:
116  return forced;
117  }
118  }
119 
120  return select_default_batch_outer_impl<C>(local_context().cpu);
121  }
122 
123 #else
124 
131  template <typename C>
133  return select_default_batch_outer_impl<C>(false);
134  }
135 
136 #endif
137 
142  template <etl_expr C>
143  void assign_to(C&& c) const {
144  inc_counter("temp:assign");
145 
146  auto& a = this->a();
147  auto& b = this->b();
148 
149  constexpr_select auto impl = select_batch_outer_impl<C>();
150 
151  if
152  constexpr_select(impl == etl::outer_impl::STD) {
153  inc_counter("impl:std");
154  etl::impl::standard::batch_outer(smart_forward(a), smart_forward(b), c);
155  }
156  else if
157  constexpr_select(impl == etl::outer_impl::BLAS) {
158  inc_counter("impl:blas");
159  etl::impl::blas::batch_outer(smart_forward(a), smart_forward(b), c);
160  }
161  else if
162  constexpr_select(impl == etl::outer_impl::CUBLAS) {
163  inc_counter("impl:cublas");
164  etl::impl::cublas::batch_outer(smart_forward_gpu(a), smart_forward_gpu(b), c);
165  }
166  else if
167  constexpr_select(impl == etl::outer_impl::VEC) {
168  inc_counter("impl:vec");
169  etl::impl::vec::batch_outer(smart_forward(a), smart_forward(b), c);
170  }
171  else {
172  cpp_unreachable("Invalid batch_outer selection");
173  }
174  }
175 
180  template <typename L>
181  void assign_add_to(L&& lhs) const {
182  std_add_evaluate(*this, lhs);
183  }
184 
189  template <typename L>
190  void assign_sub_to(L&& lhs) const {
191  std_sub_evaluate(*this, lhs);
192  }
193 
198  template <typename L>
199  void assign_mul_to(L&& lhs) const {
200  std_mul_evaluate(*this, lhs);
201  }
202 
207  template <typename L>
208  void assign_div_to(L&& lhs) const {
209  std_div_evaluate(*this, lhs);
210  }
211 
216  template <typename L>
217  void assign_mod_to(L&& lhs) const {
218  std_mod_evaluate(*this, lhs);
219  }
220 
227  friend std::ostream& operator<<(std::ostream& os, const batch_outer_product_expr& expr) {
228  return os << "batch_outer(" << expr._a << ", " << expr._b << ")";
229  }
230 };
231 
236 template <typename A, typename B>
239  using left_expr_t = std::decay_t<A>;
240  using right_expr_t = std::decay_t<B>;
244 
245  static constexpr bool is_etl = true;
246  static constexpr bool is_transformer = false;
247  static constexpr bool is_view = false;
248  static constexpr bool is_magic_view = false;
249  static constexpr bool is_fast = left_traits::is_fast && right_traits::is_fast;
250  static constexpr bool is_linear = false;
251  static constexpr bool is_thread_safe = true;
252  static constexpr bool is_value = false;
253  static constexpr bool is_direct = true;
254  static constexpr bool is_generator = false;
255  static constexpr bool is_padded = false;
256  static constexpr bool is_aligned = true;
257  static constexpr bool is_temporary = true;
258  static constexpr order storage_order = left_traits::storage_order;
259  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
260 
266  template <vector_mode_t V>
267  static constexpr bool vectorizable = true;
268 
273  template <size_t DD>
274  static constexpr size_t dim() {
275  return DD == 0 ? decay_traits<A>::template dim<1>() : decay_traits<B>::template dim<1>();
276  }
277 
284  static size_t dim(const expr_t& e, size_t d) {
285  if (d == 0) {
286  return etl::dim(e._a, 1);
287  } else {
288  return etl::dim(e._b, 1);
289  }
290  }
291 
297  static size_t size(const expr_t& e) {
298  return etl::dim(e._a, 1) * etl::dim(e._b, 1);
299  }
300 
305  static constexpr size_t size() {
306  return decay_traits<A>::template dim<1>() * decay_traits<B>::template dim<1>();
307  }
308 
313  static constexpr size_t dimensions() {
314  return 2;
315  }
316 
321  static constexpr int complexity() noexcept {
322  return -1;
323  }
324 };
325 
332 template <typename A, typename B>
335 }
336 
344 template <typename A, typename B, typename C>
345 auto batch_outer(A&& a, B&& b, C&& c) {
346  c = batch_outer(a, b);
347  return c;
348 }
349 
350 } //end of namespace etl
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: batch_outer_product_expr.hpp:297
BLAS implementation of the outer product.
static constexpr etl::outer_impl select_default_batch_outer_impl(bool no_gpu)
Select the batch outer product implementation for an expression of type A and B.
Definition: batch_outer_product_expr.hpp:58
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: batch_outer_product_expr.hpp:274
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: batch_outer_product_expr.hpp:321
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: batch_outer_product_expr.hpp:217
Standard implementation.
outer_impl
Enumeration describing the different implementations of outer product.
Definition: outer_impl.hpp:21
A transposition expression.
Definition: batch_outer_product_expr.hpp:25
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: batch_outer_product_expr.hpp:37
static constexpr etl::outer_impl select_batch_outer_impl()
Select the batch_outer product implementation for an expression of type A and B.
Definition: batch_outer_product_expr.hpp:132
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: batch_outer_product_expr.hpp:190
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220
order
Storage order of a matrix.
Definition: order.hpp:15
friend std::ostream & operator<<(std::ostream &os, const batch_outer_product_expr &expr)
Print a representation of the expression on the given stream.
Definition: batch_outer_product_expr.hpp:227
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
BLAS implementation of the outer product.
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
VEC implementation.
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: batch_outer_product_expr.hpp:181
batch_outer_product_expr< detail::build_type< A >, detail::build_type< B > > batch_outer(A &&a, B &&b)
Batch Outer product multiplication of two matrices.
Definition: batch_outer_product_expr.hpp:333
BLAS implementation.
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
std::decay_t< A > left_expr_t
The left sub expression type.
Definition: batch_outer_product_expr.hpp:239
static constexpr size_t size()
Returns the size of the expression.
Definition: batch_outer_product_expr.hpp:305
value_t< A > value_type
The value type of the expression.
Definition: batch_outer_product_expr.hpp:243
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
Standard implementation of the outer product.
Traits to get information about ETL types.
Definition: tmp.hpp:68
BLAS implementation.
Root namespace for the ETL library.
Definition: adapter.hpp:15
void assign_to(C &&c) const
Assign to a matrix of the same storage order.
Definition: batch_outer_product_expr.hpp:143
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: batch_outer_product_expr.hpp:313
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
std::decay_t< B > right_expr_t
The right sub expression type.
Definition: batch_outer_product_expr.hpp:240
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
Standard implementation of the outer product.
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
value_t< A > value_type
The type of value of the expression.
Definition: batch_outer_product_expr.hpp:26
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
static size_t dim(const expr_t &e, size_t d)
Returns the dth dimension of the expression.
Definition: batch_outer_product_expr.hpp:284
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
static constexpr auto storage_order
The sub storage order.
Definition: batch_outer_product_expr.hpp:31
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
batch_outer_product_expr(A a, B b)
Construct a new expression.
Definition: batch_outer_product_expr.hpp:43
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: batch_outer_product_expr.hpp:208
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: batch_outer_product_expr.hpp:199
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195