Expression Templates Library (ETL)
bias_batch_mean_4d_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
14 
15 namespace etl {
16 
21 template <etl_4d A, bool Mean>
22 struct bias_batch_mean_4d_expr : base_temporary_expr_un<bias_batch_mean_4d_expr<A, Mean>, A> {
27 
28  static constexpr auto storage_order = sub_traits::storage_order;
29 
34  static constexpr bool gpu_computable = (!Mean && cudnn_enabled && is_floating<A>)
35  || (impl::egblas::has_sbias_batch_sum4 && all_row_major<A> && all_single_precision<A>)
36  || (impl::egblas::has_dbias_batch_sum4 && all_row_major<A> && all_double_precision<A>);
37 
42  explicit bias_batch_mean_4d_expr(A a) : base_type(a) {
43  //Nothing else to init
44  }
45 
51  template <etl_1d C>
52  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const C& c) {
53  if constexpr (all_fast<A, C>) {
54  static_assert(etl::dim<1, A>() == etl::dim<0, C>(), "Invalid dimensions for bias_batch_mean_4d");
55  } else {
56  cpp_assert(etl::dim<1>(a) == etl::dim<0>(c), "Invalid dimensions for bias_batch_mean_4d");
57  }
58  }
59 
60  // Assignment functions
61 
66  template <etl_1d L>
67  void assign_to(L&& lhs) const {
68  inc_counter("temp:assign");
69 
70  auto& a = this->a();
71 
72  using T = value_t<A>;
73 
74  check(a, lhs);
75 
76  if constexpr (Mean && impl::egblas::has_sbias_batch_mean4 && all_row_major<A> && all_floating<A, L>) {
77  const auto N = etl::dim<0>(a);
78  const auto K = etl::dim<1>(a);
79  const auto W = etl::dim<2>(a);
80  const auto H = etl::dim<3>(a);
81 
82  decltype(auto) t1 = smart_forward_gpu(a);
83  t1.ensure_gpu_up_to_date();
84 
85  lhs.ensure_gpu_allocated();
86 
87  impl::egblas::bias_batch_mean4(N, K, W, H, t1.gpu_memory(), lhs.gpu_memory());
88 
89  lhs.validate_gpu();
90  lhs.invalidate_cpu();
91  } else if constexpr (!Mean && impl::egblas::has_sbias_batch_sum4 && all_row_major<A> && all_floating<A, L>) {
92  const auto N = etl::dim<0>(a);
93  const auto K = etl::dim<1>(a);
94  const auto W = etl::dim<2>(a);
95  const auto H = etl::dim<3>(a);
96 
97  decltype(auto) t1 = smart_forward_gpu(a);
98  t1.ensure_gpu_up_to_date();
99 
100  lhs.ensure_gpu_allocated();
101 
102  impl::egblas::bias_batch_sum4(N, K, W, H, t1.gpu_memory(), lhs.gpu_memory());
103 
104  lhs.validate_gpu();
105  lhs.invalidate_cpu();
106  } else if constexpr (!Mean && cudnn_enabled && all_floating<A, L>) {
107  impl::cudnn::bias_batch_mean_4d(smart_forward_gpu(a), lhs);
108  } else {
109  const auto N = etl::size(a) / etl::size(lhs);
110  const auto K = etl::size(lhs);
111 
112  standard_evaluator::pre_assign_rhs(a);
113 
114  a.ensure_cpu_up_to_date();
115 
116  auto batch_fun_k = [&](const size_t first, const size_t last) {
117  CPU_SECTION {
118  for (size_t k = first; k < last; ++k) {
119  T mean(0);
120 
121  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
122  mean += sum(a(b)(k));
123  }
124 
125  if constexpr (Mean) {
126  lhs(k) = mean / static_cast<T>(N);
127  } else {
128  lhs(k) = mean;
129  }
130  }
131  }
132  };
133 
134  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
135 
136  lhs.validate_cpu();
137  lhs.invalidate_gpu();
138  }
139  }
140 
145  template <etl_1d L>
146  void assign_add_to(L&& lhs) const {
147  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
148  std_add_evaluate(*this, lhs);
149  } else {
150  auto& a = this->a();
151 
152  standard_evaluator::pre_assign_rhs(a);
153 
154  a.ensure_cpu_up_to_date();
155  a.ensure_gpu_up_to_date();
156 
157  const auto N = etl::size(a) / etl::size(lhs);
158  const auto K = etl::size(lhs);
159 
160  using T = value_t<A>;
161 
162  check(a, lhs);
163 
164  lhs.ensure_cpu_up_to_date();
165 
166  auto batch_fun_k = [&](const size_t first, const size_t last) {
167  CPU_SECTION {
168  for (size_t k = first; k < last; ++k) {
169  T mean(0);
170 
171  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
172  mean += sum(a(b)(k));
173  }
174 
175  if constexpr (Mean) {
176  lhs(k) += mean / static_cast<T>(N);
177  } else {
178  lhs(k) += mean;
179  }
180  }
181  }
182  };
183 
184  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
185 
186  lhs.validate_cpu();
187  lhs.invalidate_gpu();
188  }
189  }
190 
195  template <etl_1d L>
196  void assign_sub_to(L&& lhs) const {
197  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
198  std_sub_evaluate(*this, lhs);
199  } else {
200  auto& a = this->a();
201 
202  standard_evaluator::pre_assign_rhs(a);
203 
204  a.ensure_cpu_up_to_date();
205  a.ensure_gpu_up_to_date();
206 
207  [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
208  const auto K = etl::size(lhs);
209 
210  using T = value_t<A>;
211 
212  check(a, lhs);
213 
214  lhs.ensure_cpu_up_to_date();
215 
216  auto batch_fun_k = [&](const size_t first, const size_t last) {
217  CPU_SECTION {
218  for (size_t k = first; k < last; ++k) {
219  T mean(0);
220 
221  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
222  mean += sum(a(b)(k));
223  }
224 
225  if constexpr (Mean) {
226  lhs(k) -= mean / static_cast<T>(N);
227  } else {
228  lhs(k) -= mean;
229  }
230  }
231  }
232  };
233 
234  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
235 
236  lhs.validate_cpu();
237  lhs.invalidate_gpu();
238  }
239  }
240 
245  template <etl_1d L>
246  void assign_mul_to(L&& lhs) const {
247  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
248  std_mul_evaluate(*this, lhs);
249  } else {
250  auto& a = this->a();
251 
252  standard_evaluator::pre_assign_rhs(a);
253 
254  a.ensure_cpu_up_to_date();
255  a.ensure_gpu_up_to_date();
256 
257  [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
258  const auto K = etl::size(lhs);
259 
260  using T = value_t<A>;
261 
262  check(a, lhs);
263 
264  lhs.ensure_cpu_up_to_date();
265 
266  auto batch_fun_k = [&](const size_t first, const size_t last) {
267  CPU_SECTION {
268  for (size_t k = first; k < last; ++k) {
269  T mean(0);
270 
271  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
272  mean += sum(a(b)(k));
273  }
274 
275  if constexpr (Mean) {
276  lhs(k) *= mean / static_cast<T>(N);
277  } else {
278  lhs(k) *= mean;
279  }
280  }
281  }
282  };
283 
284  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
285 
286  lhs.validate_cpu();
287  lhs.invalidate_gpu();
288  }
289  }
290 
295  template <etl_1d L>
296  void assign_div_to(L&& lhs) const {
297  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
298  std_div_evaluate(*this, lhs);
299  } else {
300  auto& a = this->a();
301 
302  standard_evaluator::pre_assign_rhs(a);
303 
304  a.ensure_cpu_up_to_date();
305  a.ensure_gpu_up_to_date();
306 
307  [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
308  const auto K = etl::size(lhs);
309 
310  using T = value_t<A>;
311 
312  check(a, lhs);
313 
314  lhs.ensure_cpu_up_to_date();
315 
316  auto batch_fun_k = [&](const size_t first, const size_t last) {
317  CPU_SECTION {
318  for (size_t k = first; k < last; ++k) {
319  T mean(0);
320 
321  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
322  mean += sum(a(b)(k));
323  }
324 
325  if constexpr (Mean) {
326  lhs(k) /= mean / static_cast<T>(N);
327  } else {
328  lhs(k) /= mean;
329  }
330  }
331  }
332  };
333 
334  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
335 
336  lhs.validate_cpu();
337  lhs.invalidate_gpu();
338  }
339  }
340 
345  template <etl_1d L>
346  void assign_mod_to(L&& lhs) const {
347  if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
348  std_mod_evaluate(*this, lhs);
349  } else {
350  auto& a = this->a();
351 
352  standard_evaluator::pre_assign_rhs(a);
353 
354  a.ensure_cpu_up_to_date();
355  a.ensure_gpu_up_to_date();
356 
357  [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
358  const auto K = etl::size(lhs);
359 
360  using T = value_t<A>;
361 
362  check(a, lhs);
363 
364  lhs.ensure_cpu_up_to_date();
365 
366  auto batch_fun_k = [&](const size_t first, const size_t last) {
367  CPU_SECTION {
368  for (size_t k = first; k < last; ++k) {
369  T mean(0);
370 
371  for (size_t b = 0; b < etl::dim<0>(a); ++b) {
372  mean += sum(a(b)(k));
373  }
374 
375  if constexpr (Mean) {
376  lhs(k) %= mean / static_cast<T>(N);
377  } else {
378  lhs(k) %= mean;
379  }
380  }
381  }
382  };
383 
384  engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);
385 
386  lhs.validate_cpu();
387  lhs.invalidate_gpu();
388  }
389  }
390 
397  friend std::ostream& operator<<(std::ostream& os, const bias_batch_mean_4d_expr& expr) {
398  if (Mean) {
399  return os << "bias_batch_mean_4d(" << expr._a << ")";
400  } else {
401  return os << "bias_batch_sum_4d(" << expr._a << ")";
402  }
403  }
404 };
405 
410 template <typename A, bool Mean>
413  using sub_expr_t = std::decay_t<A>;
416 
417  static constexpr bool is_etl = true;
418  static constexpr bool is_transformer = false;
419  static constexpr bool is_view = false;
420  static constexpr bool is_magic_view = false;
421  static constexpr bool is_fast = sub_traits::is_fast;
422  static constexpr bool is_linear = false;
423  static constexpr bool is_thread_safe = true;
424  static constexpr bool is_value = false;
425  static constexpr bool is_direct = true;
426  static constexpr bool is_generator = false;
427  static constexpr bool is_padded = false;
428  static constexpr bool is_aligned = true;
429  static constexpr bool is_temporary = true;
430  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
431  static constexpr order storage_order = sub_traits::storage_order;
432 
438  template <vector_mode_t V>
439  static constexpr bool vectorizable = true;
440 
445  template <size_t DD>
446  static constexpr size_t dim() {
447  static_assert(DD == 0, "Invalid dimensions access");
448  return decay_traits<A>::template dim<1>();
449  }
450 
457  static size_t dim(const expr_t& e, [[maybe_unused]] size_t d) {
458  cpp_assert(d == 0, "Invalid dimensions access");
459 
460  return etl::dim<1>(e._a);
461  }
462 
468  static size_t size(const expr_t& e) {
469  return etl::dim<1>(e._a);
470  }
471 
476  static constexpr size_t size() {
477  return decay_traits<A>::template dim<1>();
478  }
479 
484  static constexpr size_t dimensions() {
485  return 1;
486  }
487 
492  static constexpr int complexity() noexcept {
493  return -1;
494  }
495 };
496 
502 template <etl_4d E>
505 }
506 
512 template <etl_4d E>
514  return bias_batch_mean_4d_expr<detail::build_type<E>, false>{value};
515 }
516 
517 } //end of namespace etl
value_t< E > mean(E &&values)
Returns the mean of all the values contained in the given expression.
Definition: expression_builder.hpp:650
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:346
bias_batch_mean_4d_expr< detail::build_type< E >, false > bias_batch_sum_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:513
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_mean_4d_expr.hpp:415
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_mean_4d_expr.hpp:67
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_mean_4d_expr.hpp:28
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_mean_4d_expr.hpp:23
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
static constexpr size_t size()
Returns the size of the expression.
Definition: bias_batch_mean_4d_expr.hpp:476
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: bias_batch_mean_4d_expr.hpp:484
order
Storage order of a matrix.
Definition: order.hpp:15
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_mean_4d_expr.hpp:52
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
EGBLAS wrappers for the bias_batch_sum operation.
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_mean_4d_expr.hpp:413
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_mean_4d_expr.hpp:34
A transposition expression.
Definition: bias_batch_mean_4d_expr.hpp:22
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: bias_batch_mean_4d_expr.hpp:468
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:146
static size_t dim(const expr_t &e, [[maybe_unused]] size_t d)
Returns the dth dimension of the expression.
Definition: bias_batch_mean_4d_expr.hpp:457
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
friend std::ostream & operator<<(std::ostream &os, const bias_batch_mean_4d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_mean_4d_expr.hpp:397
bias_batch_mean implementations with NVidia cuDNN library
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:246
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: bias_batch_mean_4d_expr.hpp:446
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
bias_batch_mean_4d_expr(A a)
Construct a new expression.
Definition: bias_batch_mean_4d_expr.hpp:42
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
value_t< E > sum(E &&values)
Returns the sum of all the values contained in the given expression.
Definition: expression_builder.hpp:624
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:296
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:196
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
bias_batch_mean_4d_expr< detail::build_type< E >, true > bias_batch_mean_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:503
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: bias_batch_mean_4d_expr.hpp:492