Expression Templates Library (ETL)
transpose_front_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
12 //Get the implementations
14 
15 namespace etl {
16 
21 template <etl_expr A>
22 struct transpose_front_expr : base_temporary_expr_un<transpose_front_expr<A>, A> {
27 
28  static constexpr auto storage_order = sub_traits::storage_order;
29 
34  static constexpr bool gpu_computable = all_row_major<A> && all_floating<A> && impl::egblas::has_stranspose_front;
35 
40  explicit transpose_front_expr(A a) : base_type(a) {
41  //Nothing else to init
42  }
43 
49  template <etl_expr C>
50  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const C& c) {
51  if constexpr (all_fast<A, C>) {
52  static_assert(etl::dim<0, A>() == etl::dim<1, C>(), "Invalid dimensions for front transposition");
53  static_assert(etl::dim<1, A>() == etl::dim<0, C>(), "Invalid dimensions for front transposition");
54  static_assert(etl::dim<2, A>() == etl::dim<2, C>(), "Invalid dimensions for front transposition");
55  } else {
56  cpp_assert(etl::dim<0>(a) == etl::dim<1>(c), "Invalid dimensions for front transposition");
57  cpp_assert(etl::dim<1>(a) == etl::dim<0>(c), "Invalid dimensions for front transposition");
58  cpp_assert(etl::dim<2>(a) == etl::dim<2>(c), "Invalid dimensions for front transposition");
59  }
60  }
61 
62  // Assignment functions
63 
68  template <etl_expr C>
69  void assign_to(C&& lhs) const {
70  auto& a = this->a();
71 
72  check(a, lhs);
73 
74  const auto B = etl::dim<0>(a);
75  const auto K = etl::dim<1>(a);
76 
77  if constexpr (all_row_major<A, C> && all_floating<A, C> && impl::egblas::has_stranspose_front) {
78  decltype(auto) t1 = smart_forward_gpu(a);
79  t1.ensure_gpu_up_to_date();
80 
81  lhs.ensure_gpu_allocated();
82 
83  impl::egblas::transpose_front(B, K, etl::size(a) / (B * K), t1.gpu_memory(), lhs.gpu_memory());
84 
85  lhs.validate_gpu();
86  lhs.invalidate_cpu();
87  } else {
88  auto batch_fun_b = [&](const size_t first, const size_t last) {
89  for (size_t b = first; b < last; ++b) {
90  for (size_t k = 0; k < K; ++k) {
91  lhs(k)(b) = a(b)(k);
92  }
93  }
94  };
95 
96  // Ideally, this should be optimized to not use hyper thread
97  // for large containers, but the threshold is tricky to define
98  engine_dispatch_1d_serial(batch_fun_b, 0, B, 8UL);
99 
100  a.ensure_cpu_up_to_date();
101 
102  lhs.validate_cpu();
103  lhs.invalidate_gpu();
104  }
105  }
106 
111  template <etl_expr L>
112  void assign_add_to(L&& lhs) const {
113  std_add_evaluate(*this, lhs);
114  }
115 
120  template <etl_expr L>
121  void assign_sub_to(L&& lhs) const {
122  std_sub_evaluate(*this, lhs);
123  }
124 
129  template <etl_expr L>
130  void assign_mul_to(L&& lhs) const {
131  std_mul_evaluate(*this, lhs);
132  }
133 
138  template <etl_expr L>
139  void assign_div_to(L&& lhs) const {
140  std_div_evaluate(*this, lhs);
141  }
142 
147  template <etl_expr L>
148  void assign_mod_to(L&& lhs) const {
149  std_mod_evaluate(*this, lhs);
150  }
151 
158  friend std::ostream& operator<<(std::ostream& os, const transpose_front_expr& expr) {
159  return os << "trans_front(" << expr._a << ")";
160  }
161 };
162 
167 template <typename A>
170  using sub_expr_t = std::decay_t<A>;
173 
174  static constexpr bool is_etl = true;
175  static constexpr bool is_transformer = false;
176  static constexpr bool is_view = false;
177  static constexpr bool is_magic_view = false;
178  static constexpr bool is_fast = sub_traits::is_fast;
179  static constexpr bool is_linear = true;
180  static constexpr bool is_thread_safe = true;
181  static constexpr bool is_value = false;
182  static constexpr bool is_direct = true;
183  static constexpr bool is_generator = false;
184  static constexpr bool is_padded = false;
185  static constexpr bool is_aligned = true;
186  static constexpr bool is_temporary = true;
187  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
188  static constexpr order storage_order = sub_traits::storage_order;
189 
195  template <vector_mode_t V>
196  static constexpr bool vectorizable = true;
197 
202  template <size_t DD>
203  static constexpr size_t dim() {
204  if (DD == 0) {
205  return sub_traits::template dim<1>();
206  } else if (DD == 1) {
207  return sub_traits::template dim<0>();
208  } else {
209  return sub_traits::template dim<DD>();
210  }
211  }
212 
219  static size_t dim(const expr_t& e, size_t d) {
220  if (d == 0) {
221  return etl::dim<1>(e._a);
222  } else if (d == 1) {
223  return etl::dim<0>(e._a);
224  } else {
225  return etl::dim(e._a, d);
226  }
227  }
228 
234  static size_t size(const expr_t& e) {
235  return etl::size(e._a);
236  }
237 
242  static constexpr size_t size() {
243  return sub_traits::size();
244  }
245 
250  static constexpr size_t dimensions() {
251  return sub_traits::dimensions();
252  }
253 
258  static constexpr int complexity() noexcept {
259  return -1;
260  }
261 };
262 
263 } //end of namespace etl
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: transpose_front_expr.hpp:34
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489
void assign_to(C &&lhs) const
Assign to a matrix of the same storage order.
Definition: transpose_front_expr.hpp:69
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
static constexpr auto storage_order
The sub storage order.
Definition: transpose_front_expr.hpp:28
static size_t dim(const expr_t &e, size_t d)
Returns the dth dimension of the expression.
Definition: transpose_front_expr.hpp:219
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
EGBLAS wrappers for the transpose_front operation.
friend std::ostream & operator<<(std::ostream &os, const transpose_front_expr &expr)
Print a representation of the expression on the given stream.
Definition: transpose_front_expr.hpp:158
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: transpose_front_expr.hpp:258
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: transpose_front_expr.hpp:50
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
static constexpr size_t dimensions()
Return the number of dimensions of the expression.
Definition: traits_base.hpp:31
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: transpose_front_expr.hpp:250
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: transpose_front_expr.hpp:112
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: transpose_front_expr.hpp:139
auto transpose_front(const E &value)
Returns the transpose of the given expression.
Definition: expression_builder.hpp:568
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: transpose_front_expr.hpp:121
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
A transposition expression for the first layers.
Definition: transpose_front_expr.hpp:22
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: transpose_front_expr.hpp:170
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
value_t< A > value_type
The type of value of the expression.
Definition: transpose_front_expr.hpp:23
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
value_t< A > value_type
The value type of the expression.
Definition: transpose_front_expr.hpp:172
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: transpose_front_expr.hpp:148
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443
static constexpr size_t size()
Returns the size of the expression.
Definition: transpose_front_expr.hpp:242
transpose_front_expr(A a)
Construct a new expression.
Definition: transpose_front_expr.hpp:40
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: transpose_front_expr.hpp:234
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: transpose_front_expr.hpp:130
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: transpose_front_expr.hpp:203
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195