Expression Templates Library (ETL)
bias_add_4d_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
12 // Include the implementations
17 
18 namespace etl {
19 
24 template <etl_4d A, etl_1d B>
25 struct bias_add_4d_expr : base_temporary_expr_bin<bias_add_4d_expr<A, B>, A, B> {
30 
31  static constexpr auto storage_order = sub_traits::storage_order;
32 
37  static constexpr bool gpu_computable = cudnn_enabled && all_floating<A, B> && all_homogeneous<A, B>;
38 
43  explicit bias_add_4d_expr(A a, B b) : base_type(a, b) {
44  //Nothing else to init
45  }
46 
52  template <etl_4d C>
53  static void check([[maybe_unused]] const A& a, [[maybe_unused]] const B& b, [[maybe_unused]] const C& c) {
54  if constexpr (all_fast<A, B, C>) {
55  static_assert(etl::dim<1, A>() == etl::dim<0, B>(), "Invalid dimensions for bias_add");
56 
57  static_assert(etl::dim<0, A>() == etl::dim<0, C>(), "Invalid dimensions for bias_add");
58  static_assert(etl::dim<1, A>() == etl::dim<1, C>(), "Invalid dimensions for bias_add");
59  static_assert(etl::dim<2, A>() == etl::dim<2, C>(), "Invalid dimensions for bias_add");
60  static_assert(etl::dim<3, A>() == etl::dim<3, C>(), "Invalid dimensions for bias_add");
61  } else {
62  cpp_assert(etl::dim<1>(a) == etl::dim<0>(b), "Invalid dimensions for bias_add");
63 
64  cpp_assert(etl::dim<0>(a) == etl::dim<0>(c), "Invalid dimensions for bias_add");
65  cpp_assert(etl::dim<1>(a) == etl::dim<1>(c), "Invalid dimensions for bias_add");
66  cpp_assert(etl::dim<2>(a) == etl::dim<2>(c), "Invalid dimensions for bias_add");
67  cpp_assert(etl::dim<3>(a) == etl::dim<3>(c), "Invalid dimensions for bias_add");
68  }
69  }
70 
71  // Assignment functions
72 
77  template <etl_4d L>
78  void assign_to(L&& lhs) const {
79  inc_counter("temp:assign");
80 
81  if (this->is_evaluated()) {
82  lhs = this->result();
83  return;
84  }
85 
86  auto& a = this->a();
87  auto& b = this->b();
88 
89  check(a, b, lhs);
90 
91  constexpr_select auto impl = select_impl<L>();
92 
93  if
94  constexpr_select(impl == bias_add_impl::VEC) {
95  inc_counter("impl:vec");
96  impl::vec::bias_add_4d(smart_forward(a), smart_forward(b), lhs);
97  }
98  else if
99  constexpr_select(impl == bias_add_impl::STD) {
100  inc_counter("impl:std");
101  impl::standard::bias_add_4d(smart_forward(a), smart_forward(b), lhs);
102  }
103  else if
104  constexpr_select(impl == bias_add_impl::EGBLAS) {
105  inc_counter("impl:egblas");
106 
107  decltype(auto) e_x = smart_forward_gpu(a);
108  decltype(auto) e_b = smart_forward_gpu(b);
109  auto& e_y = lhs;
110 
111  e_x.ensure_gpu_up_to_date();
112  e_b.ensure_gpu_up_to_date();
113  e_y.ensure_gpu_allocated();
114 
115  impl::egblas::bias_add_4d(etl::dim<0>(a), etl::dim<1>(a), etl::dim<2>(a), etl::dim<3>(a), e_x.gpu_memory(), 1, e_b.gpu_memory(), 1,
116  e_y.gpu_memory(), 1);
117 
118  e_y.validate_gpu();
119  e_y.invalidate_cpu();
120  }
121  else if
122  constexpr_select(impl == bias_add_impl::CUDNN) {
123  inc_counter("impl:cudnn");
124  impl::cudnn::bias_add_4d(smart_forward_gpu(a), smart_forward_gpu(b), lhs);
125  }
126  else {
127  cpp_unreachable("Invalid bias_add selection");
128  }
129  }
130 
135  template <etl_4d L>
136  void assign_add_to(L&& lhs) const {
137  std_add_evaluate(*this, lhs);
138  }
139 
144  template <etl_4d L>
145  void assign_sub_to(L&& lhs) const {
146  std_sub_evaluate(*this, lhs);
147  }
148 
153  template <etl_4d L>
154  void assign_mul_to(L&& lhs) const {
155  std_mul_evaluate(*this, lhs);
156  }
157 
162  template <etl_4d L>
163  void assign_div_to(L&& lhs) const {
164  std_div_evaluate(*this, lhs);
165  }
166 
171  template <etl_4d L>
172  void assign_mod_to(L&& lhs) const {
173  std_mod_evaluate(*this, lhs);
174  }
175 
182  friend std::ostream& operator<<(std::ostream& os, const bias_add_4d_expr& expr) {
183  return os << "bias_add(" << expr._a << "," << expr._b << ")";
184  }
185 
186 private:
196  template <typename C>
197  static constexpr etl::bias_add_impl select_default_impl(bool no_gpu) {
198  constexpr bool homo = all_homogeneous<A, B, C>;
199  constexpr bool vec_possible = vec_enabled && vectorize_impl && all_vectorizable<vector_mode, A, B, C> && homo;
200  constexpr bool cudnn_possible = cudnn_enabled && all_floating<A, B, C> && homo;
201 
202  if (homo && is_single_precision<A> && impl::egblas::has_sbias_add_4d) {
204  }
205 
206  if (homo && is_double_precision<A> && impl::egblas::has_dbias_add_4d) {
208  }
209 
210  if (cudnn_possible && !no_gpu) {
212  }
213 
214  if (vec_possible) {
216  }
217 
219  }
220 
221 #ifdef ETL_MANUAL_SELECT
222 
228  template <typename C>
229  static etl::bias_add_impl select_impl() {
230  auto def = select_default_impl<C>(local_context().cpu);
231 
232  if (local_context().bias_add_selector.forced) {
233  auto forced = local_context().bias_add_selector.impl;
234 
235  switch (forced) {
236  // EGBLAS cannot always be used
238  if (!all_homogeneous<
239  A, B,
240  C> || !((is_single_precision<A> && impl::egblas::has_sbias_add_4d) || (is_double_precision<A> && impl::egblas::has_sbias_add_4d))
241  || local_context().cpu) {
242  std::cerr << "Forced selection to EGBLAS bias_add implementation, but not possible for this expression" << std::endl;
243  return def;
244  }
245 
246  return forced;
247 
248  //CUDNN cannot always be used
250  if (!cudnn_enabled || !all_floating<A, B, C> || !all_homogeneous<A, B, C> || local_context().cpu) {
251  std::cerr << "Forced selection to cUDNN bias_add implementation, but not possible for this expression" << std::endl;
252  return def;
253  }
254 
255  return forced;
256 
257  //VEC cannot always be used
258  case bias_add_impl::VEC:
259  if (!vec_enabled || !vectorize_impl || !all_vectorizable<vector_mode, A, B, C> || !all_homogeneous<A, B, C>) {
260  std::cerr << "Forced selection to VEC bias_add implementation, but not possible for this expression" << std::endl;
261  return def;
262  }
263 
264  return forced;
265 
266  //In other cases, simply use the forced impl
267  default:
268  return forced;
269  }
270  }
271 
272  return def;
273  }
274 
275 #else
276 
284  template <typename C>
285  static constexpr etl::bias_add_impl select_impl() {
286  return select_default_impl<C>(false);
287  }
288 
289 #endif
290 };
291 
297 template <typename A, typename B>
300  using sub_expr_t = std::decay_t<A>;
303 
304  static constexpr bool is_etl = true;
305  static constexpr bool is_transformer = false;
306  static constexpr bool is_view = false;
307  static constexpr bool is_magic_view = false;
308  static constexpr bool is_fast = all_fast<A, B>;
309  static constexpr bool is_linear = true;
310  static constexpr bool is_thread_safe = true;
311  static constexpr bool is_value = false;
312  static constexpr bool is_direct = true;
313  static constexpr bool is_generator = false;
314  static constexpr bool is_padded = false;
315  static constexpr bool is_aligned = true;
316  static constexpr bool is_temporary = true;
317  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
318  static constexpr order storage_order = sub_traits::storage_order;
319 
325  template <vector_mode_t V>
326  static constexpr bool vectorizable = true;
327 
332  template <size_t DD>
333  static constexpr size_t dim() {
334  return sub_traits::template dim<DD>();
335  }
336 
343  static size_t dim(const expr_t& e, size_t d) {
344  return sub_traits::dim(e._a, d);
345  }
346 
352  static size_t size(const expr_t& e) {
353  return sub_traits::size(e._a);
354  }
355 
360  static constexpr size_t size() {
361  return sub_traits::size();
362  }
363 
368  static constexpr size_t dimensions() {
369  return 4;
370  }
371 
376  static constexpr int complexity() noexcept {
377  return -1;
378  }
379 };
380 
387 template <etl_4d E, etl_1d B>
390 }
391 
392 } //end of namespace etl
static size_t dim(const expr_t &e, size_t d)
Returns the dth dimension of the expression.
Definition: bias_add_4d_expr.hpp:343
constexpr bool all_homogeneous
Traits to test if all the given ETL expresion types are padded.
Definition: traits.hpp:741
static constexpr size_t size()
Returns the size of the expression.
Definition: bias_add_4d_expr.hpp:360
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_add_4d_expr.hpp:136
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534
Standard implementation.
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
friend std::ostream & operator<<(std::ostream &os, const bias_add_4d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_add_4d_expr.hpp:182
Convolution implementations with NVidia cuDNN library.
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_add_4d_expr.hpp:145
A transposition expression.
Definition: bias_add_4d_expr.hpp:25
constexpr bool vectorize_impl
Indicates if the implementations can be automatically vectorized by ETL.
Definition: config.hpp:35
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
bias_add_impl
Enumeration describing the different implementations of bias_add.
Definition: bias_add_impl.hpp:21
VEC implementation.
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_add_4d_expr.hpp:300
Standard implementation of the outer product.
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_add_4d_expr.hpp:37
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
bool is_evaluated() const noexcept
Indicates if the temporary has been evaluated.
Definition: base_temporary_expr.hpp:140
value_t< A > value_type
The type of value of the expression.
Definition: bias_add_4d_expr.hpp:26
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const B &b, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_add_4d_expr.hpp:53
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: bias_add_4d_expr.hpp:376
GPU implementation.
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_add_4d_expr.hpp:78
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
EGBLAS wrappers for the bias_add_2d operation.
value_t< A > value_type
The value type of the expression.
Definition: bias_add_4d_expr.hpp:302
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
GPU implementation.
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
Standard implementation of the bias_add computation.
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: bias_add_4d_expr.hpp:333
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_add_4d_expr.hpp:163
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_add_4d_expr.hpp:172
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: bias_add_4d_expr.hpp:352
bias_add_4d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_4d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_4d_expr.hpp:388
static constexpr auto storage_order
The sub storage order.
Definition: bias_add_4d_expr.hpp:31
bias_add_4d_expr(A a, B b)
Construct a new expression.
Definition: bias_add_4d_expr.hpp:43
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_add_4d_expr.hpp:154
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: bias_add_4d_expr.hpp:368
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195