Expression Templates Library (ETL)
conv_4d.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 namespace etl::detail {
14 
18 template <size_t S1, size_t S2, size_t P1, size_t P2>
26  template <typename I, typename K, typename C>
27  static void apply(const I& input, const K& kernel, C&& conv) {
28 #ifndef ETL_MANUAL_SELECT
29  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
30  impl::cudnn::conv4_forward(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, S1, S2, P1, P2);
31  } else {
32 #endif
33  auto impl = select_conv4_valid_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
34 
35  if (impl == etl::conv4_impl::CUDNN) {
36  impl::cudnn::conv4_forward(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, S1, S2, P1, P2);
37  } else if (impl == etl::conv4_impl::BLAS_VEC) {
38  impl::vec::blas_conv4_valid(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
39  } else if (impl == etl::conv4_impl::BLAS_MKL) {
40  impl::blas::blas_conv4_valid(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
41  } else if (impl == etl::conv4_impl::VEC) {
42  impl::vec::conv4_valid(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
43  } else if (impl == etl::conv4_impl::STD) {
44  impl::standard::conv4_valid(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
45  } else {
46  cpp_unreachable("Invalid conv implementation selection");
47  }
48 #ifndef ETL_MANUAL_SELECT
49  }
50 #endif
51  }
52 };
53 
57 template <size_t S1, size_t S2, size_t P1, size_t P2>
65  template <typename I, typename K, typename C>
66  static void apply(const I& input, const K& kernel, C&& conv) {
67 #ifndef ETL_MANUAL_SELECT
68  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
69  impl::cudnn::conv4_forward_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, S1, S2, P1, P2);
70  } else {
71 #endif
72  auto impl = select_conv4_valid_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
73 
74  if (impl == etl::conv4_impl::CUDNN) {
75  impl::cudnn::conv4_forward_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, S1, S2, P1, P2);
76  } else if (impl == etl::conv4_impl::BLAS_VEC) {
77  impl::vec::blas_conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
78  } else if (impl == etl::conv4_impl::BLAS_MKL) {
79  impl::blas::blas_conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
80  } else if (impl == etl::conv4_impl::VEC) {
81  impl::vec::conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
82  } else if (impl == etl::conv4_impl::STD) {
83  impl::standard::conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
84  } else {
85  cpp_unreachable("Invalid conv implementation selection");
86  }
87 #ifndef ETL_MANUAL_SELECT
88  }
89 #endif
90  }
91 };
92 
103  template <typename I, typename K, typename C>
104  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
105 #ifndef ETL_MANUAL_SELECT
106  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
107  impl::cudnn::conv4_forward(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, s1, s2, p1, p2);
108  } else {
109 #endif
110  auto impl = select_conv4_valid_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
111 
112  if (impl == etl::conv4_impl::CUDNN) {
113  impl::cudnn::conv4_forward(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, s1, s2, p1, p2);
114  } else if (impl == etl::conv4_impl::BLAS_VEC) {
115  impl::vec::blas_conv4_valid(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
116  } else if (impl == etl::conv4_impl::BLAS_MKL) {
117  impl::blas::blas_conv4_valid(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
118  } else if (impl == etl::conv4_impl::VEC) {
119  impl::vec::conv4_valid(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
120  } else if (impl == etl::conv4_impl::STD) {
121  impl::standard::conv4_valid(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
122  } else {
123  cpp_unreachable("Invalid conv implementation selection");
124  }
125 #ifndef ETL_MANUAL_SELECT
126  }
127 #endif
128  }
129 };
130 
141  template <typename I, typename K, typename C>
142  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
143 #ifndef ETL_MANUAL_SELECT
144  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
145  impl::cudnn::conv4_forward_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, s1, s2, p1, p2);
146  } else {
147 #endif
148  auto impl = select_conv4_valid_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
149 
150  if (impl == etl::conv4_impl::CUDNN) {
151  impl::cudnn::conv4_forward_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv, s1, s2, p1, p2);
152  } else if (impl == etl::conv4_impl::BLAS_VEC) {
153  impl::vec::blas_conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
154  } else if (impl == etl::conv4_impl::BLAS_MKL) {
155  impl::blas::blas_conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
156  } else if (impl == etl::conv4_impl::VEC) {
157  impl::vec::conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
158  } else if (impl == etl::conv4_impl::STD) {
159  impl::standard::conv4_valid_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
160  } else {
161  cpp_unreachable("Invalid conv implementation selection");
162  }
163 #ifndef ETL_MANUAL_SELECT
164  }
165 #endif
166  }
167 };
168 
172 template <size_t S1, size_t S2, size_t P1, size_t P2>
180  template <typename I, typename K, typename C>
181  static void apply(const I& input, const K& kernel, C&& conv) {
182  auto impl = select_conv4_valid_filter_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
183 
184  if (impl == etl::conv4_impl::BLAS_VEC) {
185  impl::vec::blas_conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
186  } else if (impl == etl::conv4_impl::BLAS_MKL) {
187  impl::blas::blas_conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
188  } else if (impl == etl::conv4_impl::VEC) {
189  impl::vec::conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
190  } else if (impl == etl::conv4_impl::STD) {
191  impl::standard::conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
192  } else {
193  cpp_unreachable("Invalid conv implementation selection");
194  }
195  }
196 };
197 
201 template <size_t S1, size_t S2, size_t P1, size_t P2>
209  template <typename I, typename K, typename C>
210  static void apply(const I& input, const K& kernel, C&& conv) {
211  auto impl = select_conv4_valid_filter_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
212 
213  if (impl == etl::conv4_impl::BLAS_VEC) {
214  impl::vec::blas_conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
215  } else if (impl == etl::conv4_impl::BLAS_MKL) {
216  impl::blas::blas_conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
217  } else if (impl == etl::conv4_impl::VEC) {
218  impl::vec::conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
219  } else if (impl == etl::conv4_impl::STD) {
220  impl::standard::conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
221  } else {
222  cpp_unreachable("Invalid conv implementation selection");
223  }
224  }
225 };
226 
237  template <typename I, typename K, typename C>
238  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
239  auto impl = select_conv4_valid_filter_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
240 
241  if (impl == etl::conv4_impl::BLAS_VEC) {
242  impl::vec::blas_conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
243  } else if (impl == etl::conv4_impl::BLAS_MKL) {
244  impl::blas::blas_conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
245  } else if (impl == etl::conv4_impl::VEC) {
246  impl::vec::conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
247  } else if (impl == etl::conv4_impl::STD) {
248  impl::standard::conv4_valid_filter(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
249  } else {
250  cpp_unreachable("Invalid conv implementation selection");
251  }
252  }
253 };
254 
265  template <typename I, typename K, typename C>
266  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
267  auto impl = select_conv4_valid_filter_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
268 
269  if (impl == etl::conv4_impl::BLAS_VEC) {
270  impl::vec::blas_conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
271  } else if (impl == etl::conv4_impl::BLAS_MKL) {
272  impl::blas::blas_conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
273  } else if (impl == etl::conv4_impl::VEC) {
274  impl::vec::conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
275  } else if (impl == etl::conv4_impl::STD) {
276  impl::standard::conv4_valid_filter_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
277  } else {
278  cpp_unreachable("Invalid conv implementation selection");
279  }
280  }
281 };
282 
286 template <size_t S1, size_t S2, size_t P1, size_t P2>
294  template <typename I, typename K, typename C>
295  static void apply(const I& input, const K& kernel, C&& conv) {
296  auto impl = select_conv4_valid_back_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
297 
298  if (impl == etl::conv4_impl::BLAS_VEC) {
299  impl::vec::blas_conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
300  } else if (impl == etl::conv4_impl::BLAS_MKL) {
301  impl::blas::blas_conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
302  } else if (impl == etl::conv4_impl::VEC) {
303  impl::vec::conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
304  } else if (impl == etl::conv4_impl::STD) {
305  impl::standard::conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
306  } else {
307  cpp_unreachable("Invalid conv implementation selection");
308  }
309  }
310 };
311 
315 template <size_t S1, size_t S2, size_t P1, size_t P2>
323  template <typename I, typename K, typename C>
324  static void apply(const I& input, const K& kernel, C&& conv) {
325  auto impl = select_conv4_valid_back_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
326 
327  if (impl == etl::conv4_impl::BLAS_VEC) {
328  impl::vec::blas_conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
329  } else if (impl == etl::conv4_impl::BLAS_MKL) {
330  impl::blas::blas_conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
331  } else if (impl == etl::conv4_impl::VEC) {
332  impl::vec::conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
333  } else if (impl == etl::conv4_impl::STD) {
334  impl::standard::conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, S1, S2, P1, P2);
335  } else {
336  cpp_unreachable("Invalid conv implementation selection");
337  }
338  }
339 };
340 
351  template <typename I, typename K, typename C>
352  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
353  auto impl = select_conv4_valid_back_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
354 
355  if (impl == etl::conv4_impl::BLAS_VEC) {
356  impl::vec::blas_conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
357  } else if (impl == etl::conv4_impl::BLAS_MKL) {
358  impl::blas::blas_conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
359  } else if (impl == etl::conv4_impl::VEC) {
360  impl::vec::conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
361  } else if (impl == etl::conv4_impl::STD) {
362  impl::standard::conv4_valid_back(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
363  } else {
364  cpp_unreachable("Invalid conv implementation selection");
365  }
366  }
367 };
368 
379  template <typename I, typename K, typename C>
380  static void apply(const I& input, const K& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2) {
381  auto impl = select_conv4_valid_back_impl<I, K, C>(etl::dim<2>(input), etl::dim<3>(input), etl::dim<2>(kernel), etl::dim<3>(kernel));
382 
383  if (impl == etl::conv4_impl::BLAS_VEC) {
384  impl::vec::blas_conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
385  } else if (impl == etl::conv4_impl::BLAS_MKL) {
386  impl::blas::blas_conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
387  } else if (impl == etl::conv4_impl::VEC) {
388  impl::vec::conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
389  } else if (impl == etl::conv4_impl::STD) {
390  impl::standard::conv4_valid_back_flipped(smart_forward(input), smart_forward(kernel), conv, s1, s2, p1, p2);
391  } else {
392  cpp_unreachable("Invalid conv implementation selection");
393  }
394  }
395 };
396 
407  template <typename I, typename K, typename C>
408  static void apply(const I& input, const K& kernel, C&& conv) {
409 #ifndef ETL_MANUAL_SELECT
410  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
411  impl::cudnn::conv4_backward_data_full(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
412  } else {
413 #endif
414  auto impl = select_conv4_full_impl<I, K, C>(etl::dim<2>(kernel), etl::dim<3>(kernel));
415 
416  if (impl == etl::conv4_impl::CUDNN) {
417  impl::cudnn::conv4_backward_data_full(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
418  } else if (impl == etl::conv4_impl::VEC) {
419  impl::vec::conv4_full(smart_forward(input), smart_forward(kernel), conv);
420  } else if (impl == etl::conv4_impl::FFT_STD) {
421  impl::standard::conv4_full_fft(smart_forward(input), smart_forward(kernel), conv);
422  } else if (impl == etl::conv4_impl::FFT_MKL) {
423  impl::blas::conv4_full(smart_forward(input), smart_forward(kernel), conv);
424  } else if (impl == etl::conv4_impl::FFT_CUFFT) {
425  impl::cufft::conv4_full(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
426  } else if (impl == etl::conv4_impl::STD) {
427  impl::standard::conv4_full(smart_forward(input), smart_forward(kernel), conv);
428  } else {
429  cpp_unreachable("Invalid conv implementation selection");
430  }
431 #ifndef ETL_MANUAL_SELECT
432  }
433 #endif
434  }
435 };
436 
447  template <typename I, typename K, typename C>
448  static void apply(const I& input, const K& kernel, C&& conv) {
449 #ifndef ETL_MANUAL_SELECT
450  if constexpr (impl::cudnn::conv_possible<I, K, C>) {
451  impl::cudnn::conv4_backward_data_full_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
452  } else {
453 #endif
454  auto impl = select_conv4_full_impl<I, K, C>(etl::dim<2>(kernel), etl::dim<3>(kernel));
455 
456  if (impl == etl::conv4_impl::CUDNN) {
457  impl::cudnn::conv4_backward_data_full_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
458  } else if (impl == etl::conv4_impl::VEC) {
459  impl::vec::conv4_full_flipped(smart_forward(input), smart_forward(kernel), conv);
460  } else if (impl == etl::conv4_impl::FFT_STD) {
461  impl::standard::conv4_full_fft_flipped(smart_forward(input), smart_forward(kernel), conv);
462  } else if (impl == etl::conv4_impl::FFT_MKL) {
463  impl::blas::conv4_full_flipped(smart_forward(input), smart_forward(kernel), conv);
464  } else if (impl == etl::conv4_impl::FFT_CUFFT) {
465  impl::cufft::conv4_full_flipped(smart_forward_gpu(input), smart_forward_gpu(kernel), conv);
466  } else if (impl == etl::conv4_impl::STD) {
467  impl::standard::conv4_full_flipped(smart_forward(input), smart_forward(kernel), conv);
468  } else {
469  cpp_unreachable("Invalid conv implementation selection");
470  }
471 #ifndef ETL_MANUAL_SELECT
472  }
473 #endif
474  }
475 };
476 
477 } //end of namespace etl::detail
FFT reduction (with MKL impl)
The functor impl for 4D valid_back conv.
Definition: conv_4d.hpp:372
The functor impl for 4D full conv.
Definition: conv_4d.hpp:440
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:142
Standard implementation.
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:380
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:408
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:173
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:238
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:96
VEC implementation.
The functor impl for 4D valid_back conv.
Definition: conv_4d.hpp:344
Definition: expression_builder.hpp:699
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:295
The functor impl for 4D valid_back conv.
Definition: conv_4d.hpp:316
FFT reduction (with STD impl)
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:258
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:266
GPU implementation.
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:324
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:210
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:448
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:58
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:27
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:134
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:66
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:19
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:352
The functor impl for 4D full conv.
Definition: conv_4d.hpp:400
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:202
static void apply(const I &input, const K &kernel, C &&conv)
Apply the convolution.
Definition: conv_4d.hpp:181
static void apply(const I &input, const K &kernel, C &&conv, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the convolution.
Definition: conv_4d.hpp:104
The functor impl for 4D valid conv.
Definition: conv_4d.hpp:230
The functor impl for 4D valid_back conv.
Definition: conv_4d.hpp:287
FFT reduction (with CUFFT impl)