Expression Templates Library (ETL)
bias_batch_mean_2d_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
14 
15 namespace etl {
16 
21 template <etl_2d A, bool Mean>
22 struct bias_batch_mean_2d_expr : base_temporary_expr_un<bias_batch_mean_2d_expr<A, Mean>, A> {
27 
28  static constexpr auto storage_order = sub_traits::storage_order;
29 
34  static constexpr bool gpu_computable = (!Mean && cudnn_enabled && is_floating<A>)
35  || (impl::egblas::has_sbias_batch_sum && all_row_major<A> && all_single_precision<A>)
36  || (impl::egblas::has_dbias_batch_sum && all_row_major<A> && all_double_precision<A>);
37 
42  explicit bias_batch_mean_2d_expr(A a) : base_type(a) {
43  //Nothing else to init
44  }
45 
51  template <etl_1d C>
52  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const C& c) {
53  if constexpr (all_fast<A, C>) {
54  static_assert(etl::dim<1, A>() == etl::dim<0, C>(), "Invalid dimensions for bias_batch_mean_2d");
55  } else {
56  cpp_assert(etl::dim<1>(a) == etl::dim<0>(c), "Invalid dimensions for bias_batch_mean_2d");
57  }
58  }
59 
60  // Assignment functions
61 
66  template <etl_expr L>
67  void assign_to(L&& lhs) const {
68  inc_counter("temp:assign");
69 
70  auto& a = this->a();
71 
72  using T = value_t<A>;
73 
74  check(a, lhs);
75 
76  if constexpr (!Mean && impl::egblas::has_sbias_batch_sum && all_row_major<A> && all_floating<A, L>) {
77  const auto N = etl::dim<0>(a);
78  const auto K = etl::dim<1>(a);
79 
80  decltype(auto) t1 = smart_forward_gpu(a);
81  t1.ensure_gpu_up_to_date();
82 
83  lhs.ensure_gpu_allocated();
84 
85  impl::egblas::bias_batch_sum(N, K, t1.gpu_memory(), 1, lhs.gpu_memory(), 1);
86 
87  lhs.validate_gpu();
88  lhs.invalidate_cpu();
89  } else if constexpr (Mean && impl::egblas::has_sbias_batch_mean && all_row_major<A> && all_floating<A, L>) {
90  const auto N = etl::dim<0>(a);
91  const auto K = etl::dim<1>(a);
92 
93  decltype(auto) t1 = smart_forward_gpu(a);
94  t1.ensure_gpu_up_to_date();
95 
96  lhs.ensure_gpu_allocated();
97 
98  impl::egblas::bias_batch_mean(N, K, t1.gpu_memory(), 1, lhs.gpu_memory(), 1);
99 
100  lhs.validate_gpu();
101  lhs.invalidate_cpu();
102  } else if constexpr (!Mean && cudnn_enabled && all_floating<A, L>) {
103  impl::cudnn::bias_batch_mean_2d(smart_forward_gpu(a), lhs);
104  } else {
105  const auto N = etl::dim<0>(a);
106  const auto K = etl::dim<1>(a);
107 
108  standard_evaluator::pre_assign_rhs(a);
109 
110  auto batch_fun_k = [&](const size_t first, const size_t last) {
111  for (size_t k = first; k < last; ++k) {
112  T mean(0);
113 
114  for (size_t b = 0; b < N; ++b) {
115  mean += a(b, k);
116  }
117 
118  if constexpr (Mean) {
119  lhs(k) = mean / static_cast<T>(N);
120  } else {
121  lhs(k) = mean;
122  }
123  }
124  };
125 
126  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
127  }
128  }
129 
134  template <etl_expr L>
135  void assign_add_to(L&& lhs) const {
136  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
137  std_add_evaluate(*this, lhs);
138  } else {
139  auto& a = this->a();
140 
141  standard_evaluator::pre_assign_rhs(a);
142 
143  const auto N = etl::size(a) / etl::size(lhs);
144  const auto K = etl::size(lhs);
145 
146  using T = value_t<A>;
147 
148  check(a, lhs);
149 
150  auto batch_fun_k = [&](const size_t first, const size_t last) {
151  for (size_t k = first; k < last; ++k) {
152  T mean(0);
153 
154  for (size_t b = 0; b < N; ++b) {
155  mean += a(b, k);
156  }
157 
158  if constexpr (Mean) {
159  lhs(k) += mean / static_cast<T>(N);
160  } else {
161  lhs(k) += mean;
162  }
163  }
164  };
165 
166  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
167  }
168  }
169 
174  template <etl_expr L>
175  void assign_sub_to(L&& lhs) const {
176  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
177  std_sub_evaluate(*this, lhs);
178  } else {
179  auto& a = this->a();
180 
181  standard_evaluator::pre_assign_rhs(a);
182 
183  const auto N = etl::size(a) / etl::size(lhs);
184  const auto K = etl::size(lhs);
185 
186  using T = value_t<A>;
187 
188  check(a, lhs);
189 
190  auto batch_fun_k = [&](const size_t first, const size_t last) {
191  for (size_t k = first; k < last; ++k) {
192  T mean(0);
193 
194  for (size_t b = 0; b < N; ++b) {
195  mean += a(b, k);
196  }
197 
198  if constexpr (Mean) {
199  lhs(k) -= mean / static_cast<T>(N);
200  } else {
201  lhs(k) -= mean;
202  }
203  }
204  };
205 
206  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
207  }
208  }
209 
214  template <etl_expr L>
215  void assign_mul_to(L&& lhs) const {
216  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
217  std_mul_evaluate(*this, lhs);
218  } else {
219  auto& a = this->a();
220 
221  standard_evaluator::pre_assign_rhs(a);
222 
223  const auto N = etl::size(a) / etl::size(lhs);
224  const auto K = etl::size(lhs);
225 
226  using T = value_t<A>;
227 
228  check(a, lhs);
229 
230  auto batch_fun_k = [&](const size_t first, const size_t last) {
231  for (size_t k = first; k < last; ++k) {
232  T mean(0);
233 
234  for (size_t b = 0; b < N; ++b) {
235  mean += a(b, k);
236  }
237 
238  if constexpr (Mean) {
239  lhs(k) *= mean / static_cast<T>(N);
240  } else {
241  lhs(k) *= mean;
242  }
243  }
244  };
245 
246  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
247  }
248  }
249 
254  template <etl_expr L>
255  void assign_div_to(L&& lhs) const {
256  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
257  std_div_evaluate(*this, lhs);
258  } else {
259  auto& a = this->a();
260 
261  standard_evaluator::pre_assign_rhs(a);
262 
263  const auto N = etl::size(a) / etl::size(lhs);
264  const auto K = etl::size(lhs);
265 
266  using T = value_t<A>;
267 
268  check(a, lhs);
269 
270  auto batch_fun_k = [&](const size_t first, const size_t last) {
271  for (size_t k = first; k < last; ++k) {
272  T mean(0);
273 
274  for (size_t b = 0; b < N; ++b) {
275  mean += a(b, k);
276  }
277 
278  if constexpr (Mean) {
279  lhs(k) /= mean / static_cast<T>(N);
280  } else {
281  lhs(k) /= mean;
282  }
283  }
284  };
285 
286  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
287  }
288  }
289 
294  template <etl_expr L>
295  void assign_mod_to(L&& lhs) const {
296  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
297  std_mod_evaluate(*this, lhs);
298  } else {
299  auto& a = this->a();
300 
301  standard_evaluator::pre_assign_rhs(a);
302 
303  const auto N = etl::size(a) / etl::size(lhs);
304  const auto K = etl::size(lhs);
305 
306  using T = value_t<A>;
307 
308  check(a, lhs);
309 
310  auto batch_fun_k = [&](const size_t first, const size_t last) {
311  for (size_t k = first; k < last; ++k) {
312  T mean(0);
313 
314  for (size_t b = 0; b < N; ++b) {
315  mean += a(b, k);
316  }
317 
318  if constexpr (Mean) {
319  lhs(k) %= mean / static_cast<T>(N);
320  } else {
321  lhs(k) %= mean;
322  }
323  }
324  };
325 
326  engine_dispatch_1d_serial(batch_fun_k, 0, K, 4UL);
327  }
328  }
329 
336  friend std::ostream& operator<<(std::ostream& os, const bias_batch_mean_2d_expr& expr) {
337  if (Mean) {
338  return os << "bias_batch_mean_2d(" << expr._a << ")";
339  } else {
340  return os << "bias_batch_sum_2d(" << expr._a << ")";
341  }
342  }
343 };
344 
349 template <typename A, bool Mean>
352  using sub_expr_t = std::decay_t<A>;
355 
356  static constexpr bool is_etl = true;
357  static constexpr bool is_transformer = false;
358  static constexpr bool is_view = false;
359  static constexpr bool is_magic_view = false;
360  static constexpr bool is_fast = sub_traits::is_fast;
361  static constexpr bool is_linear = false;
362  static constexpr bool is_thread_safe = true;
363  static constexpr bool is_value = false;
364  static constexpr bool is_direct = true;
365  static constexpr bool is_generator = false;
366  static constexpr bool is_padded = false;
367  static constexpr bool is_aligned = true;
368  static constexpr bool is_temporary = true;
369  static constexpr order storage_order = sub_traits::storage_order;
370  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
371 
377  template <vector_mode_t V>
378  static constexpr bool vectorizable = true;
379 
384  template <size_t DD>
385  static constexpr size_t dim() requires(DD == 0) {
386  return decay_traits<A>::template dim<1>();
387  }
388 
395  static size_t dim(const expr_t& e, [[maybe_unused]] size_t d) {
396  cpp_assert(d == 0, "Invalid dimensions access");
397 
398  return etl::dim<1>(e._a);
399  }
400 
406  static size_t size(const expr_t& e) {
407  return etl::dim<1>(e._a);
408  }
409 
414  static constexpr size_t size() {
415  return decay_traits<A>::template dim<1>();
416  }
417 
422  static constexpr size_t dimensions() {
423  return 1;
424  }
425 
430  static constexpr int complexity() noexcept {
431  return -1;
432  }
433 };
434 
440 template <etl_2d E>
443 }
444 
450 template <etl_2d E>
452  return bias_batch_mean_2d_expr<detail::build_type<E>, false>{value};
453 }
454 
455 } //end of namespace etl
value_t< E > mean(E &&values)
Returns the mean of all the values contained in the given expression.
Definition: expression_builder.hpp:650
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:295
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:175
constexpr int complexity([[maybe_unused]] const E &expr) noexcept
Return the complexity of the expression.
Definition: helpers.hpp:38
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
EGBLAS wrappers for the bias_batch_sum operation.
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
A transposition expression.
Definition: bias_batch_mean_2d_expr.hpp:22
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
bias_batch_mean implementations with NVidia cuDNN library
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_mean_2d_expr.hpp:67
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:135
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:215
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_mean_2d_expr.hpp:34
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
bias_batch_mean_2d_expr< detail::build_type< E >, false > bias_batch_sum_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:451
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_mean_2d_expr.hpp:28
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_mean_2d_expr.hpp:52
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443
bias_batch_mean_2d_expr< detail::build_type< E >, true > bias_batch_mean_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:441
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
bias_batch_mean_2d_expr(A a)
Construct a new expression.
Definition: bias_batch_mean_2d_expr.hpp:42
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_mean_2d_expr.hpp:354
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_mean_2d_expr.hpp:352
friend std::ostream & operator<<(std::ostream &os, const bias_batch_mean_2d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_mean_2d_expr.hpp:336
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:255
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_mean_2d_expr.hpp:23