Expression Templates Library (ETL)
bias_batch_var_2d_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
13 
14 namespace etl {
15 
20 template <etl_2d A, etl_1d B>
21 struct bias_batch_var_2d_expr : base_temporary_expr_bin<bias_batch_var_2d_expr<A, B>, A, B> {
26 
27  static constexpr auto storage_order = sub_traits::storage_order;
28 
33  static constexpr bool gpu_computable =
34  (impl::egblas::has_sbias_batch_var && all_row_major<A> && all_single_precision<A>)
35  || (impl::egblas::has_dbias_batch_var && all_row_major<A> && all_double_precision<A>);
36 
41  explicit bias_batch_var_2d_expr(A a, B b) : base_type(a, b) {
42  //Nothing else to init
43  }
44 
50  template <etl_1d C>
51  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const B& b, [[maybe_unused]] const C& c) {
52  if constexpr (all_fast<A, B, C>) {
53  static_assert(etl::dim<1, A>() == etl::dim<0, C>(), "Invalid dimensions for bias_batch_var_2d");
54  static_assert(etl::dim<0, B>() == etl::dim<0, C>(), "Invalid dimensions for bias_batch_var_2d");
55  } else {
56  cpp_assert(etl::dim<1>(a) == etl::dim<0>(c), "Invalid dimensions for bias_batch_var_2d");
57  cpp_assert(etl::dim<0>(b) == etl::dim<0>(c), "Invalid dimensions for bias_batch_var_2d");
58  }
59  }
60 
61  // Assignment functions
62 
67  template <etl_expr L>
68  void assign_to(L&& lhs) const {
69  inc_counter("temp:assign");
70 
71  auto& a = this->a();
72  auto& b = this->b();
73 
74  using T = value_t<A>;
75 
76  check(a, b, lhs);
77 
78  const auto N = etl::dim<0>(a);
79  const auto K = etl::dim<1>(a);
80 
81  if constexpr (impl::egblas::has_sbias_batch_var && all_row_major<A> && all_floating<A, L>) {
82  decltype(auto) t1 = smart_forward_gpu(a);
83  decltype(auto) t2 = smart_forward_gpu(b);
84 
85  t1.ensure_gpu_up_to_date();
86  t2.ensure_gpu_up_to_date();
87 
88  lhs.ensure_gpu_allocated();
89 
90  impl::egblas::bias_batch_var(N, K, t1.gpu_memory(), 1, t2.gpu_memory(), 1, lhs.gpu_memory(), 1);
91 
92  lhs.validate_gpu();
93  lhs.invalidate_cpu();
94  } else {
95  standard_evaluator::pre_assign_rhs(a);
96  standard_evaluator::pre_assign_rhs(b);
97 
98  auto batch_fun_k = [&](const size_t first, const size_t last) {
99  for (size_t k = first; k < last; ++k) {
100  T mean(0);
101 
102  for (size_t bb = 0; bb < N; ++bb) {
103  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
104  }
105 
106  lhs(k) = mean / N;
107  }
108  };
109 
110  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
111 
112  lhs.validate_cpu();
113  lhs.invalidate_gpu();
114  }
115  }
116 
121  template <etl_expr L>
122  void assign_add_to(L&& lhs) const {
123  auto& a = this->a();
124  auto& b = this->b();
125 
126  standard_evaluator::pre_assign_rhs(a);
127  standard_evaluator::pre_assign_rhs(b);
128 
129  const auto N = etl::size(a) / etl::size(lhs);
130  const auto K = etl::size(lhs);
131 
132  using T = value_t<A>;
133 
134  check(a, b, lhs);
135 
136  auto batch_fun_k = [&](const size_t first, const size_t last) {
137  for (size_t k = first; k < last; ++k) {
138  T mean(0);
139 
140  for (size_t bb = 0; bb < N; ++bb) {
141  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
142  }
143 
144  lhs(k) += mean / static_cast<T>(N);
145  }
146  };
147 
148  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
149  }
150 
155  template <etl_expr L>
156  void assign_sub_to(L&& lhs) const {
157  auto& a = this->a();
158  auto& b = this->b();
159 
160  standard_evaluator::pre_assign_rhs(a);
161  standard_evaluator::pre_assign_rhs(b);
162 
163  const auto N = etl::size(a) / etl::size(lhs);
164  const auto K = etl::size(lhs);
165 
166  using T = value_t<A>;
167 
168  check(a, b, lhs);
169 
170  auto batch_fun_k = [&](const size_t first, const size_t last) {
171  for (size_t k = first; k < last; ++k) {
172  T mean(0);
173 
174  for (size_t bb = 0; bb < N; ++bb) {
175  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
176  }
177 
178  lhs(k) -= mean / static_cast<T>(N);
179  }
180  };
181 
182  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
183  }
184 
189  template <etl_expr L>
190  void assign_mul_to(L&& lhs) const {
191  auto& a = this->a();
192  auto& b = this->b();
193 
194  standard_evaluator::pre_assign_rhs(a);
195  standard_evaluator::pre_assign_rhs(b);
196 
197  const auto N = etl::size(a) / etl::size(lhs);
198  const auto K = etl::size(lhs);
199 
200  using T = value_t<A>;
201 
202  check(a, b, lhs);
203 
204  auto batch_fun_k = [&](const size_t first, const size_t last) {
205  for (size_t k = first; k < last; ++k) {
206  T mean(0);
207 
208  for (size_t bb = 0; bb < N; ++bb) {
209  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
210  }
211 
212  lhs(k) *= mean / static_cast<T>(N);
213  }
214  };
215 
216  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
217  }
218 
223  template <etl_expr L>
224  void assign_div_to(L&& lhs) const {
225  auto& a = this->a();
226  auto& b = this->b();
227 
228  standard_evaluator::pre_assign_rhs(a);
229  standard_evaluator::pre_assign_rhs(b);
230 
231  const auto N = etl::size(a) / etl::size(lhs);
232  const auto K = etl::size(lhs);
233 
234  using T = value_t<A>;
235 
236  check(a, b, lhs);
237 
238  auto batch_fun_k = [&](const size_t first, const size_t last) {
239  for (size_t k = first; k < last; ++k) {
240  T mean(0);
241 
242  for (size_t bb = 0; bb < N; ++bb) {
243  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
244  }
245 
246  lhs(k) /= mean / static_cast<T>(N);
247  }
248  };
249 
250  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
251  }
252 
257  template <etl_expr L>
258  void assign_mod_to(L&& lhs) const {
259  auto& a = this->a();
260  auto& b = this->b();
261 
262  standard_evaluator::pre_assign_rhs(a);
263  standard_evaluator::pre_assign_rhs(b);
264 
265  const auto N = etl::size(a) / etl::size(lhs);
266  const auto K = etl::size(lhs);
267 
268  using T = value_t<A>;
269 
270  check(a, b, lhs);
271 
272  auto batch_fun_k = [&](const size_t first, const size_t last) {
273  for (size_t k = first; k < last; ++k) {
274  T mean(0);
275 
276  for (size_t bb = 0; bb < N; ++bb) {
277  mean += (a(bb, k) - b(k)) * (a(bb, k) - b(k));
278  }
279 
280  lhs(k) %= mean / static_cast<T>(N);
281  }
282  };
283 
284  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
285  }
286 
293  friend std::ostream& operator<<(std::ostream& os, const bias_batch_var_2d_expr& expr) {
294  return os << "bias_batch_var_2d(" << expr._a << ")";
295  }
296 };
297 
302 template <etl_2d A, etl_1d B>
305  using sub_expr_t = std::decay_t<A>;
308 
309  static constexpr bool is_etl = true;
310  static constexpr bool is_transformer = false;
311  static constexpr bool is_view = false;
312  static constexpr bool is_magic_view = false;
313  static constexpr bool is_fast = sub_traits::is_fast;
314  static constexpr bool is_linear = false;
315  static constexpr bool is_thread_safe = true;
316  static constexpr bool is_value = false;
317  static constexpr bool is_direct = true;
318  static constexpr bool is_generator = false;
319  static constexpr bool is_padded = false;
320  static constexpr bool is_aligned = true;
321  static constexpr bool is_temporary = true;
322  static constexpr order storage_order = sub_traits::storage_order;
323  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
324 
330  template <vector_mode_t V>
331  static constexpr bool vectorizable = true;
332 
337  template <size_t DD>
338  static constexpr size_t dim() requires(DD == 0) {
339  return decay_traits<A>::template dim<1>();
340  }
341 
348  static size_t dim(const expr_t& e, [[maybe_unused]] size_t d) {
349  cpp_assert(d == 0, "Invalid dimensions access");
350  return etl::dim<1>(e._a);
351  }
352 
358  static size_t size(const expr_t& e) {
359  return etl::dim<1>(e._a);
360  }
361 
366  static constexpr size_t size() {
367  return decay_traits<A>::template dim<1>();
368  }
369 
374  static constexpr size_t dimensions() {
375  return 1;
376  }
377 
382  static constexpr int complexity() noexcept {
383  return -1;
384  }
385 };
386 
392 template <etl_2d A, etl_1d B>
395 }
396 
397 } //end of namespace etl
value_t< E > mean(E &&values)
Returns the mean of all the values contained in the given expression.
Definition: expression_builder.hpp:650
constexpr int complexity([[maybe_unused]] const E &expr) noexcept
Return the complexity of the expression.
Definition: helpers.hpp:38
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_var_2d_expr.hpp:122
order
Storage order of a matrix.
Definition: order.hpp:15
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_var_2d_expr.hpp:190
friend std::ostream & operator<<(std::ostream &os, const bias_batch_var_2d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_var_2d_expr.hpp:293
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_var_2d_expr.hpp:33
EGBLAS wrappers for the bias_batch_sum operation.
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_var_2d_expr.hpp:22
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const B &b, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_var_2d_expr.hpp:51
bias_batch_var_2d_expr(A a, B b)
Construct a new expression.
Definition: bias_batch_var_2d_expr.hpp:41
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_var_2d_expr.hpp:305
bias_batch_var_2d_expr< detail::build_type< A >, detail::build_type< B > > bias_batch_var_2d(const A &a, const B &b)
Returns the transpose of the given expression.
Definition: bias_batch_var_2d_expr.hpp:393
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_var_2d_expr.hpp:258
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_var_2d_expr.hpp:68
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_var_2d_expr.hpp:224
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_var_2d_expr.hpp:156
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_var_2d_expr.hpp:27
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
A transposition expression.
Definition: bias_batch_var_2d_expr.hpp:21
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_var_2d_expr.hpp:307