Expression Templates Library (ETL)
conv_normal_select.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 namespace etl::detail {
16 
27 template <conv_type TT, typename I, typename K, typename C>
29  //Note: since the constexpr values will be known at compile time, the
30  //conditions will be a lot simplified
31 
32  // First handle GPU
33  if (TT == conv_type::VALID) {
34  if (egblas_enabled && all_floating<I, K, C> && all_homogeneous<I, K, C> && impl::egblas::has_sconv1_valid && !no_gpu) {
36  }
37  }
38 
39  if (TT == conv_type::SAME) {
40  if (egblas_enabled && all_floating<I, K, C> && all_homogeneous<I, K, C> && impl::egblas::has_sconv1_same && !no_gpu) {
42  }
43  }
44 
45  if (TT == conv_type::FULL) {
46  if (egblas_enabled && all_floating<I, K, C> && all_homogeneous<I, K, C> && impl::egblas::has_sconv1_full && !no_gpu) {
48  }
49  }
50 
51  if (TT == conv_type::FULL) {
52  if (impl::cufft::conv1_possible<I, K, C> && !no_gpu) {
54  } else if (impl::blas::conv1_possible<I, K, C>) {
55  //TODO This should only be done for some sizes
57  }
58  }
59 
60  if (impl::vec::conv1_possible<vector_mode, I, K, C>) {
61  return etl::conv_impl::VEC;
62  } else {
63  return etl::conv_impl::STD;
64  }
65 }
66 
77 template <conv_type TT, typename I, typename K, typename C>
79  //Note: since the constexpr values will be known at compile time, the
80  //conditions will be a lot simplified
81 
82  constexpr order input_order = decay_traits<I>::storage_order;
83  constexpr order kernel_order = decay_traits<K>::storage_order;
84  constexpr order output_order = decay_traits<C>::storage_order;
85 
86  //Only the standard implementation is able to handle column major
87  if (input_order == order::ColumnMajor || kernel_order == order::ColumnMajor || output_order == order::ColumnMajor) {
88  return etl::conv_impl::STD;
89  }
90 
91  // Full has more options
92  if (TT == conv_type::FULL) {
93  if (impl::cudnn::conv_possible<I, K, C> && !no_gpu) {
94  return etl::conv_impl::CUDNN;
95  } else if (impl::cufft::conv2_possible<I, K, C> && !no_gpu) {
97  } else if (impl::blas::conv2_possible<I, K, C>) {
99  }
100  }
101 
102  if (impl::vec::conv2_possible<vector_mode, I, K, C>) {
103  return etl::conv_impl::VEC;
104  } else {
105  return etl::conv_impl::STD;
106  }
107 }
108 
119 template <conv_type TT, typename I, typename K, typename C>
120 constexpr etl::conv_impl select_default_conv_impl(bool no_gpu) {
121  //Note: since the constexpr values will be known at compile time, the
122  //conditions will be a lot simplified
123 
124  constexpr order input_order = decay_traits<I>::storage_order;
125  constexpr order kernel_order = decay_traits<K>::storage_order;
126  constexpr order output_order = decay_traits<C>::storage_order;
127 
128  //Only the standard implementation is able to handle column major
129  if (input_order == order::ColumnMajor || kernel_order == order::ColumnMajor || output_order == order::ColumnMajor) {
130  return etl::conv_impl::STD;
131  }
132 
133  if (impl::cudnn::conv_possible<I, K, C> && (TT == conv_type::VALID || TT == conv_type::FULL) && is_2d<I> && !no_gpu) {
134  return etl::conv_impl::CUDNN;
135  } else if (impl::vec::conv2_possible<vector_mode, I, K, C>) {
136  return etl::conv_impl::VEC;
137  } else {
138  return etl::conv_impl::STD;
139  }
140 }
141 
142 #ifdef ETL_MANUAL_SELECT
143 
151 template <conv_type TT, typename I, typename K, typename C>
153  auto default_impl = select_default_conv1_impl_new<TT, I, K, C>(local_context().cpu);
154 
155  //COVERAGE_EXCLUDE_BEGIN
156  if (local_context().conv_selector.forced) {
157  auto forced = local_context().conv_selector.impl;
158 
159  switch (forced) {
160  //MKL cannot always be used
161  case conv_impl::FFT_MKL:
162  if (!impl::blas::conv1_possible<I, K, C>) {
163  std::cerr << "Forced selection to MKL fft_conv implementation, but not possible for this expression" << std::endl;
164  return default_impl;
165  }
166 
167  return forced;
168 
169  //CUFFT cannot always be used
171  if (!impl::cufft::conv1_possible<I, K, C> || local_context().cpu) {
172  std::cerr << "Forced selection to CUFFT fft_conv implementation, but not possible for this expression" << std::endl;
173  return default_impl;
174  }
175 
176  return forced;
177 
178  //VEC cannot always be used
179  case conv_impl::VEC:
180  if (!impl::vec::conv1_possible<vector_mode, I, K, C>) {
181  std::cerr << "Forced selection to VEC conv1 implementation, but not possible for this expression" << std::endl;
182  return default_impl;
183  }
184 
185  return forced;
186 
187  //EGBLAS cannot always be used
188  case conv_impl::EGBLAS:
189  if (!egblas_enabled || !all_floating<I, K, C> || !all_homogeneous<I, K, C> || local_context().cpu) {
190  std::cerr << "Forced selection to EGBLAS conv1 implementation, but not possible for this expression" << std::endl;
191  return default_impl;
192  }
193 
194  return forced;
195 
196  //In other cases, simply use the forced impl
197  default:
198  return forced;
199  }
200  }
201  //COVERAGE_EXCLUDE_END
202 
203  return default_impl;
204 }
205 
213 template <conv_type TT, typename I, typename K, typename C>
215  auto default_impl = select_default_conv2_impl_new<TT, I, K, C>(local_context().cpu);
216 
217  //COVERAGE_EXCLUDE_BEGIN
218  if (local_context().conv_selector.forced) {
219  auto forced = local_context().conv_selector.impl;
220 
221  switch (forced) {
222  //VEC cannot always be used
223  case conv_impl::VEC:
224  if (!impl::vec::conv2_possible<vector_mode, I, K, C>) {
225  std::cerr << "Forced selection to VEC conv2 implementation, but not possible for this expression" << std::endl;
226  return default_impl;
227  }
228 
229  return forced;
230 
231  //CUDNN cannot always be used
232  case conv_impl::CUDNN:
233  if (!impl::cudnn::conv_possible<I, K, C> || local_context().cpu) {
234  std::cerr << "Forced selection to CUDNN conv implementation, but not possible for this expression" << std::endl;
235  return default_impl;
236  }
237 
238  return forced;
239 
240  //MKL cannot always be used
241  case conv_impl::FFT_MKL:
242  if (!impl::blas::conv2_possible<I, K, C>) {
243  std::cerr << "Forced selection to MKL conv implementation, but not possible for this expression" << std::endl;
244  return default_impl;
245  }
246 
247  return forced;
248 
249  //CUFFT cannot always be used
251  if (!impl::cufft::conv2_possible<I, K, C> || local_context().cpu) {
252  std::cerr << "Forced selection to CUFFT conv implementation, but not possible for this expression" << std::endl;
253  return default_impl;
254  }
255 
256  return forced;
257 
258  //In other cases, simply use the forced impl
259  default:
260  return forced;
261  }
262  }
263  //COVERAGE_EXCLUDE_END
264 
265  return default_impl;
266 }
267 
275 template <conv_type TT, typename I, typename K, typename C>
277  auto default_impl = select_default_conv_impl<TT, I, K, C>(local_context().cpu);
278 
279  //COVERAGE_EXCLUDE_BEGIN
280  if (local_context().conv_selector.forced) {
281  auto forced = local_context().conv_selector.impl;
282 
283  switch (forced) {
284  //MKL cannot always be used
285  case conv_impl::FFT_MKL:
286  if (!impl::blas::conv2_possible<I, K, C>) {
287  std::cerr << "Forced selection to MKL fft_conv implementation, but not possible for this expression" << std::endl;
288  return default_impl;
289  }
290 
291  return forced;
292 
293  //CUFFT cannot always be used
295  if (!impl::cufft::conv2_possible<I, K, C> || local_context().cpu) {
296  std::cerr << "Forced selection to CUFFT fft_conv implementation, but not possible for this expression" << std::endl;
297  return default_impl;
298  }
299 
300  return forced;
301 
302  //CUDNN cannot always be used
303  case conv_impl::CUDNN:
304  if (!impl::cudnn::conv_possible<I, K, C> || local_context().cpu) {
305  std::cerr << "Forced selection to CUDNN conv implementation, but not possible for this expression" << std::endl;
306  return default_impl;
307  }
308 
309  return forced;
310 
311  //VEC cannot always be used
312  case conv_impl::VEC:
313  if (!impl::vec::conv2_possible<vector_mode, I, K, C>) {
314  std::cerr << "Forced selection to VEC conv2 implementation, but not possible for this expression" << std::endl;
315  return default_impl;
316  }
317 
318  return forced;
319 
320  //In other cases, simply use the forced impl
321  default:
322  return forced;
323  }
324  }
325  //COVERAGE_EXCLUDE_END
326 
327  return default_impl;
328 }
329 
330 #else
331 
340 template <conv_type TT, typename I, typename K, typename C>
342  return select_default_conv1_impl_new<TT, I, K, C>(false);
343 }
344 
353 template <conv_type TT, typename I, typename K, typename C>
355  return select_default_conv2_impl_new<TT, I, K, C>(false);
356 }
357 
366 template <conv_type TT, typename I, typename K, typename C>
368  return select_default_conv_impl<TT, I, K, C>(false);
369 }
370 
371 #endif
372 
373 } //end of namespace etl::detail
constexpr etl::conv_impl select_conv1_impl_new()
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:341
FFT reduction (with MKL impl)
Standard implementation.
Same convolution.
constexpr etl::conv_impl select_default_conv2_impl_new(bool no_gpu)
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:78
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr etl::conv_impl select_conv_impl()
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:367
VEC implementation.
Definition: expression_builder.hpp:699
Valid convolution.
constexpr etl::conv_impl select_conv2_impl_new()
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:354
Traits to get information about ETL types.
Definition: tmp.hpp:68
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
constexpr bool egblas_enabled
Indicates if the EGBLAS library is available for ETL.
Definition: config.hpp:119
GPU implementation.
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
GPU implementation.
conv_impl
Enumeration describing the different convolution implementations.
Definition: conv_impl.hpp:20
Full convolution.
constexpr etl::conv_impl select_default_conv_impl(bool no_gpu)
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:120
Column-Major storage.
constexpr etl::conv_impl select_default_conv1_impl_new(bool no_gpu)
Select the implementation of the conv of I and K in C.
Definition: conv_normal_select.hpp:28
FFT reduction (with CUFFT impl)