Expression Templates Library (ETL)
conv_2d_full_multi_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/expr/base_temporary_expr.hpp"
11 
12 //Get the implementations
13 #include "etl/impl/conv.hpp"
14 
15 namespace etl {
16 
21 template <etl_2d A, etl_3d B, bool Flipped>
22 struct conv_2d_full_multi_expr : base_temporary_expr_bin<conv_2d_full_multi_expr<A, B, Flipped>, A, B> {
27 
28  static constexpr auto storage_order = left_traits::storage_order;
29 
34  static constexpr bool gpu_computable = cudnn_enabled && impl::cudnn::conv_possible_<A, B>;
35 
40  explicit conv_2d_full_multi_expr(A a, B b) : base_type(a, b) {
41  //Nothing else to init
42  }
43 
44  // Assignment functions
45 
49  template <etl_2d I, etl_3d K, etl_3d C>
50  static void check([[maybe_unused]] const I& input, [[maybe_unused]] const K& kernel, [[maybe_unused]] const C& conv) {
51  if constexpr (all_fast<A, B, C>) {
52  static_assert(etl::dim<0, C>() == etl::dim<0, K>(), "Invalid dimensions for conv2_full_multi");
53  static_assert(etl::dim<1, C>() == etl::dim<0, I>() + etl::dim<1, K>() - 1, "Invalid dimensions for conv2_full_multi");
54  static_assert(etl::dim<2, C>() == etl::dim<1, I>() + etl::dim<2, K>() - 1, "Invalid dimensions for conv2_full_multi");
55  } else {
56  cpp_assert(etl::dim(conv, 0) == etl::dim(kernel, 0), "Invalid dimensions for conv2_full_multi");
57  cpp_assert(etl::dim(conv, 1) == etl::dim(input, 1) + etl::dim(kernel, 1) - 1, "Invalid dimensions for conv2_full_multi");
58  cpp_assert(etl::dim(conv, 2) == etl::dim(input, 2) + etl::dim(kernel, 2) - 1, "Invalid dimensions for conv2_full_multi");
59  }
60  }
61 
70  template <etl_3d C>
72  constexpr order input_order = decay_traits<A>::storage_order;
73  constexpr order kernel_order = decay_traits<B>::storage_order;
74  constexpr order output_order = decay_traits<C>::storage_order;
75 
76  //Only the standard implementation is able to handle column major
77  if (input_order == order::ColumnMajor || kernel_order == order::ColumnMajor || output_order == order::ColumnMajor) {
79  }
80 
81  if (impl::vec::conv2_possible<vector_mode, A, B, C>) {
83  }
84 
86  }
87 
88 #ifdef ETL_MANUAL_SELECT
89 
95  template <etl_3d C>
97  if (local_context().conv_multi_selector.forced) {
98  auto forced = local_context().conv_multi_selector.impl;
99 
100  switch (forced) {
101  //CUDNN cannot always be used
103  if (!cudnn_enabled) { // COVERAGE_EXCLUDE_LINE
104  std::cerr << "Forced selection to CUDNN conv implementation, but not possible for this expression"
105  << std::endl; // COVERAGE_EXCLUDE_LINE
106  return select_default_impl<C>(); // COVERAGE_EXCLUDE_LINE
107  } // COVERAGE_EXCLUDE_LINE
108 
109  return forced;
110 
111  //VEC cannot always be used
113  if (!impl::vec::conv2_possible<vector_mode, A, B, C>) {
114  std::cerr << "Forced selection to VEC conv2_full_multi implementation, but not possible for this expression" << std::endl;
115  return select_default_impl<C>(); // COVERAGE_EXCLUDE_LINE
116  }
117 
118  return forced;
119 
120  // Although it may be suboptimal the forced selection can
121  // always be achieved
122  default:
123  return forced;
124  }
125  }
126 
127  return select_default_impl<C>();
128  }
129 
130 #else
131 
137  template <etl_3d C>
138  static constexpr etl::conv_multi_impl select_impl() {
139  return select_default_impl<C>();
140  }
141 
142 #endif
143 
148  template <etl_3d C>
149  void assign_to(C&& conv) const {
150  inc_counter("temp:assign");
151 
152  auto& input = this->a();
153  auto& kernel = this->b();
154 
155  check(input, kernel, conv);
156 
157  constexpr_select auto impl = select_impl<C>();
158 
159  if constexpr (Flipped) {
160  if
161  constexpr_select(impl == etl::conv_multi_impl::VEC) {
162  inc_counter("impl:vec");
163  impl::vec::conv2_full_multi_flipped(smart_forward(input), smart_forward(kernel), conv);
164  }
165  else if
166  constexpr_select(impl == etl::conv_multi_impl::STD) {
167  inc_counter("impl:std");
168  impl::standard::conv2_full_multi_flipped(smart_forward(input), smart_forward(kernel), conv);
169  }
170  else if
171  constexpr_select(impl == etl::conv_multi_impl::FFT_STD) {
172  inc_counter("impl:fft_std");
173  impl::standard::conv2_full_multi_flipped_fft(smart_forward(input), smart_forward(kernel), conv);
174  }
175  else if
176  constexpr_select(impl == etl::conv_multi_impl::FFT_MKL) {
177  inc_counter("impl:fft_mkl");
178  impl::blas::conv2_full_multi_flipped(smart_forward(input), smart_forward(kernel), conv);
179  }
180  else if
181  constexpr_select(impl == etl::conv_multi_impl::FFT_CUFFT) {
182  inc_counter("impl:fft_cufft");
183  impl::cufft::conv2_full_multi_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
184  }
185  else {
186  cpp_unreachable("Invalid conv implementation selection");
187  }
188  } else {
189  if
190  constexpr_select(impl == etl::conv_multi_impl::VEC) {
191  inc_counter("impl:vec");
192  impl::vec::conv2_full_multi(smart_forward(input), smart_forward(kernel), conv);
193  }
194  else if
195  constexpr_select(impl == etl::conv_multi_impl::STD) {
196  inc_counter("impl:std");
197  impl::standard::conv2_full_multi(smart_forward(input), smart_forward(kernel), conv);
198  }
199  else if
200  constexpr_select(impl == etl::conv_multi_impl::FFT_STD) {
201  inc_counter("impl:fft_std");
202  impl::standard::conv2_full_multi_fft(smart_forward(input), smart_forward(kernel), conv);
203  }
204  else if
205  constexpr_select(impl == etl::conv_multi_impl::FFT_MKL) {
206  inc_counter("impl:fft_mkl");
207  impl::blas::conv2_full_multi(smart_forward(input), smart_forward(kernel), conv);
208  }
209  else if
210  constexpr_select(impl == etl::conv_multi_impl::FFT_CUFFT) {
211  inc_counter("impl:fft_cufft");
212  impl::cufft::conv2_full_multi(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
213  }
214  else {
215  cpp_unreachable("Invalid conv implementation selection");
216  }
217  }
218  }
219 
224  template <typename L>
225  void assign_add_to(L&& lhs) const {
226  std_add_evaluate(*this, lhs);
227  }
228 
233  template <typename L>
234  void assign_sub_to(L&& lhs) const {
235  std_sub_evaluate(*this, lhs);
236  }
237 
242  template <typename L>
243  void assign_mul_to(L&& lhs) const {
244  std_mul_evaluate(*this, lhs);
245  }
246 
251  template <typename L>
252  void assign_div_to(L&& lhs) const {
253  std_div_evaluate(*this, lhs);
254  }
255 
260  template <typename L>
261  void assign_mod_to(L&& lhs) const {
262  std_mod_evaluate(*this, lhs);
263  }
264 
271  friend std::ostream& operator<<(std::ostream& os, const conv_2d_full_multi_expr& expr) {
272  return os << "conv2_full_multi(" << expr._a << ", " << expr._b << ")";
273  }
274 };
275 
280 template <etl_2d A, etl_3d B, bool Flipped>
281 struct etl_traits<etl::conv_2d_full_multi_expr<A, B, Flipped>> {
283  using left_expr_t = std::decay_t<A>;
284  using right_expr_t = std::decay_t<B>;
288 
289  static constexpr bool is_etl = true;
290  static constexpr bool is_transformer = false;
291  static constexpr bool is_view = false;
292  static constexpr bool is_magic_view = false;
293  static constexpr bool is_fast = all_fast<A, B>;
294  static constexpr bool is_linear = false;
295  static constexpr bool is_thread_safe = true;
296  static constexpr bool is_value = false;
297  static constexpr bool is_direct = true;
298  static constexpr bool is_generator = false;
299  static constexpr bool is_padded = false;
300  static constexpr bool is_aligned = true;
301  static constexpr bool is_temporary = true;
302  static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
303  static constexpr order storage_order = left_traits::storage_order;
304 
310  template <vector_mode_t V>
311  static constexpr bool vectorizable = true;
312 
317  template <size_t DD>
318  static constexpr size_t dim() {
319  return DD == 0 ? etl::dim<0, B>() : DD == 1 ? etl::dim<0, A>() + etl::dim<1, B>() - 1 : etl::dim<1, A>() + etl::dim<2, B>() - 1;
320  }
321 
328  static size_t dim(const expr_t& e, size_t d) {
329  if (d == 0) {
330  return etl::dim(e._b, 0);
331  } else if (d == 1) {
332  return etl::dim(e._a, 0) + etl::dim(e._b, 1) - 1;
333  } else {
334  cpp_assert(d == 2, "Invalid access to conv_2d_full_multi dimension");
335 
336  return etl::dim(e._a, 1) + etl::dim(e._b, 2) - 1;
337  }
338  }
339 
345  static size_t size(const expr_t& e) {
346  return (etl::dim(e._b, 0)) * (etl::dim(e._a, 0) + etl::dim(e._b, 1) - 1) * (etl::dim(e._a, 1) + etl::dim(e._b, 2) - 1);
347  }
348 
353  static constexpr size_t size() {
354  return (etl::dim<0, B>()) * (etl::dim<0, A>() + etl::dim<1, B>() - 1) * (etl::dim<1, A>() + etl::dim<2, B>() - 1);
355  }
356 
361  static constexpr size_t dimensions() {
362  return 3;
363  }
364 
369  static constexpr int complexity() noexcept {
370  return -1;
371  }
372 };
373 
385 template <etl_2d A, etl_3d B>
387  return conv_2d_full_multi_expr<detail::build_type<A>, detail::build_type<B>, false>{a, b};
388 }
389 
402 template <etl_2d A, etl_3d B, etl_3d C>
403 auto conv_2d_full_multi(A&& a, B&& b, C&& c) {
404  c = conv_2d_full_multi(a, b);
405 
406  return c;
407 }
408 
420 template <etl_2d A, etl_3d B>
422  return conv_2d_full_multi_expr<detail::build_type<A>, detail::build_type<B>, true>{a, b};
423 }
424 
437 template <etl_2d A, etl_3d B, etl_3d C>
438 auto conv_2d_full_multi_flipped(A&& a, B&& b, C&& c) {
440 
441  return c;
442 }
443 
444 } //end of namespace etl
FFT reduction (with MKL impl)
static constexpr size_t size()
Returns the size of the expression.
Definition: conv_2d_full_multi_expr.hpp:353
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: conv_2d_full_multi_expr.hpp:345
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534
conv_2d_full_multi_expr< detail::build_type< A >, detail::build_type< B >, false > conv_2d_full_multi(A &&a, B &&b)
Creates an expression representing the &#39;full&#39; 1D convolution of a and b.
Definition: conv_2d_full_multi_expr.hpp:386
static size_t dim(const expr_t &e, size_t d)
Returns the dth dimension of the expression.
Definition: conv_2d_full_multi_expr.hpp:328
Standard implementation.
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: conv_2d_full_multi_expr.hpp:234
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: conv_2d_full_multi_expr.hpp:225
static constexpr etl::conv_multi_impl select_default_impl()
Select the implementation of the conv multi of I and K in C.
Definition: conv_2d_full_multi_expr.hpp:71
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
VEC implementation.
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: conv_2d_full_multi_expr.hpp:318
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
static constexpr etl::conv_multi_impl select_impl()
Select the implementation of the conv of I and K in C.
Definition: conv_2d_full_multi_expr.hpp:138
FFT reduction (with STD impl)
conv_multi_impl
Enumeration describing the different multiple convolution implementations.
Definition: conv_impl.hpp:47
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
value_t< A > value_type
The type of value of the expression.
Definition: conv_2d_full_multi_expr.hpp:23
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: conv_2d_full_multi_expr.hpp:252
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
GPU implementation.
A transposition expression.
Definition: conv_2d_full_multi_expr.hpp:22
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: conv_2d_full_multi_expr.hpp:361
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
void assign_to(C &&conv) const
Assign to a matrix of the full storage order.
Definition: conv_2d_full_multi_expr.hpp:149
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
value_t< A > value_type
The value type of the expression.
Definition: conv_2d_full_multi_expr.hpp:287
Selector for the convolution implementations.
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
Column-Major storage.
std::decay_t< A > left_expr_t
The left sub expression type.
Definition: conv_2d_full_multi_expr.hpp:283
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
conv_2d_full_multi_expr< detail::build_type< A >, detail::build_type< B >, true > conv_2d_full_multi_flipped(A &&a, B &&b)
Creates an expression representing the &#39;full&#39; 1D convolution of a and flipped b.
Definition: conv_2d_full_multi_expr.hpp:421
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: conv_2d_full_multi_expr.hpp:261
std::decay_t< B > right_expr_t
The right sub expression type.
Definition: conv_2d_full_multi_expr.hpp:284
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: conv_2d_full_multi_expr.hpp:34
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
conv_2d_full_multi_expr(A a, B b)
Construct a new expression.
Definition: conv_2d_full_multi_expr.hpp:40
friend std::ostream & operator<<(std::ostream &os, const conv_2d_full_multi_expr &expr)
Print a representation of the expression on the given stream.
Definition: conv_2d_full_multi_expr.hpp:271
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: conv_2d_full_multi_expr.hpp:369
static void check([[maybe_unused]] const I &input, [[maybe_unused]] const K &kernel, [[maybe_unused]] const C &conv)
Assert that the convolution is done on correct dimensions.
Definition: conv_2d_full_multi_expr.hpp:50
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: conv_2d_full_multi_expr.hpp:243
static constexpr auto storage_order
The sub storage order.
Definition: conv_2d_full_multi_expr.hpp:28
FFT reduction (with CUFFT impl)