Expression Templates Library (ETL)
conv.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 #define ETL_EXPERIMENTAL_TENSOR_CORES
16 
17 #ifdef ETL_CUDNN_MODE
18 
19 #include "etl/impl/cublas/cuda.hpp"
20 #include "etl/impl/cudnn/cudnn.hpp"
21 
22 #endif
23 
24 namespace etl::impl::cudnn {
25 
34 template <typename I, typename K, typename C>
35 constexpr bool conv_possible = cudnn_enabled&& all_homogeneous<I, K, C>&& all_row_major<I, K, C>&& all_dma<I, K, C>;
36 
44 template <typename I, typename K>
45 constexpr bool conv_possible_ = cudnn_enabled&& all_homogeneous<I, K>&& all_row_major<I, K>&& all_dma<I, K>;
46 
47 #ifdef ETL_CUDNN_MODE
48 
59 template <typename I, typename K, typename C>
60 void conv2_valid_set(I&& input, K&& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2, cudnnConvolutionMode_t mode) {
61  using type = std::remove_const_t<value_t<I>>;
62 
63  type alpha[] = {1.0f};
64  type beta[] = {0.0f};
65 
66  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
67 
68  decltype(auto) handle = start_cudnn();
69 
70  // Prepare the tensors
71  auto input_tensor = create_tensor_wrapper(input);
72  auto output_tensor = create_tensor_wrapper(conv);
73  auto filter = create_filter_wrapper(kernel);
74 
75  // Prepare the convolution
76  cudnnConvolutionDescriptor_t convolution;
77  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
78  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, p1, p2, s1, s2, 1, 1, mode, data_type));
79 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
80  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
81 #endif
82 
83  // Find the algorithm to use
84  cudnnConvolutionFwdAlgoPerf_t algo;
85  int algos = 0;
86  cudnn_check(cudnnGetConvolutionForwardAlgorithm_v7(handle.get(), *input_tensor, *filter, convolution, *output_tensor,
87  1, &algos, &algo));
88 
89  // Prepare the workspace
91  if (algo.memory) {
92  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
93  }
94 
95  // Allocate GPU memory, if necessary
96 
97  input.ensure_gpu_up_to_date();
98  kernel.ensure_gpu_up_to_date();
99  conv.ensure_gpu_allocated();
100 
101  // Perform the convolution
102 
103  cudnn_check(cudnnConvolutionForward(handle.get(), alpha, *input_tensor, input.gpu_memory(), *filter, kernel.gpu_memory(), convolution, algo.algo,
104  workspace.get(), algo.memory, beta, *output_tensor, conv.gpu_memory()));
105 
106  conv.validate_gpu();
107  conv.invalidate_cpu();
108 
109  // Release the resources
110  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
111 }
112 
123 template <typename I, typename K, typename C>
124 void conv2_valid([[maybe_unused]] I&& input,
125  [[maybe_unused]] K&& kernel,
126  [[maybe_unused]] C&& conv,
127  [[maybe_unused]] size_t s1,
128  [[maybe_unused]] size_t s2,
129  [[maybe_unused]] size_t p1,
130  [[maybe_unused]] size_t p2) {
131  if constexpr (conv_possible<I, K, C>) {
132  conv2_valid_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CONVOLUTION);
133  } else {
134  cpp_unreachable("CUDNN not available/enabled");
135  }
136 }
137 
149 template <typename I, typename K, typename C>
150 void conv2_valid_flipped([[maybe_unused]] I&& input,
151  [[maybe_unused]] K&& kernel,
152  [[maybe_unused]] C&& conv,
153  [[maybe_unused]] size_t s1,
154  [[maybe_unused]] size_t s2,
155  [[maybe_unused]] size_t p1,
156  [[maybe_unused]] size_t p2) {
157  if constexpr (conv_possible<I, K, C>) {
158  conv2_valid_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CROSS_CORRELATION);
159  } else {
160  cpp_unreachable("CUDNN not available/enabled");
161  }
162 }
163 
170 template <typename I, typename K, typename C>
171 void conv4_forward_set(I&& input, K&& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2, cudnnConvolutionMode_t mode) {
172  using type = value_t<I>;
173 
174  type alpha[] = {1.0f};
175  type beta[] = {0.0f};
176 
177  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
178 
179  decltype(auto) handle = start_cudnn();
180 
181  // Prepare the tensors
182  auto input_tensor = create_tensor_wrapper(input);
183  auto output_tensor = create_tensor_wrapper(conv);
184  auto filter = create_filter_wrapper(kernel);
185 
186  // Prepare the convolution
187  cudnnConvolutionDescriptor_t convolution;
188  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
189  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, p1, p2, s1, s2, 1, 1, mode, data_type));
190 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
191  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
192 #endif
193 
194  // Find the algorithm to use
195  cudnnConvolutionFwdAlgoPerf_t algo;
196  int algos = 0;
197  cudnn_check(cudnnGetConvolutionForwardAlgorithm_v7(handle.get(), *input_tensor, *filter, convolution, *output_tensor,
198  1, &algos, &algo));
199 
200  // Prepare the workspace
202  if (algo.memory) {
203  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
204  }
205 
206  // Allocate GPU memory, if necessary
207 
208  input.ensure_gpu_up_to_date();
209  kernel.ensure_gpu_up_to_date();
210  conv.ensure_gpu_allocated();
211 
212  // Perform the convolution
213 
214  cudnn_check(cudnnConvolutionForward(handle.get(), alpha, *input_tensor, input.gpu_memory(), *filter, kernel.gpu_memory(), convolution, algo.algo,
215  workspace.get(), algo.memory, beta, *output_tensor, conv.gpu_memory()));
216 
217  conv.validate_gpu();
218  conv.invalidate_cpu();
219 
220  // Release the resources
221  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
222 }
223 
230 template <typename I, typename K, typename C>
231 void conv4_forward([[maybe_unused]] I&& input,
232  [[maybe_unused]] K&& kernel,
233  [[maybe_unused]] C&& conv,
234  [[maybe_unused]] size_t s1,
235  [[maybe_unused]] size_t s2,
236  [[maybe_unused]] size_t p1,
237  [[maybe_unused]] size_t p2) {
238  if constexpr (conv_possible<I, K, C>) {
239  conv4_forward_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CONVOLUTION);
240  } else {
241  cpp_unreachable("CUDNN not available/enabled");
242  }
243 }
244 
251 template <typename I, typename K, typename C>
252 void conv4_forward_flipped([[maybe_unused]] I&& input,
253  [[maybe_unused]] K&& kernel,
254  [[maybe_unused]] C&& conv,
255  [[maybe_unused]] size_t s1,
256  [[maybe_unused]] size_t s2,
257  [[maybe_unused]] size_t p1,
258  [[maybe_unused]] size_t p2) {
259  if constexpr (conv_possible<I, K, C>) {
260  conv4_forward_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CROSS_CORRELATION);
261  } else {
262  cpp_unreachable("CUDNN not available/enabled");
263  }
264 }
265 
274 template <typename I, typename K, typename C>
275 void conv4_backward_filter_set(I&& input, K&& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2, cudnnConvolutionMode_t mode) {
276  using type = value_t<I>;
277 
278  type alpha[] = {1.0f};
279  type beta[] = {0.0f};
280 
281  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
282 
283  decltype(auto) handle = start_cudnn();
284 
285  // Prepare the tensors
286  auto input_tensor = create_tensor_wrapper(input);
287  auto output_tensor = create_tensor_wrapper(kernel);
288  auto filter = create_filter_wrapper(conv);
289 
290  // Prepare the convolution
291  cudnnConvolutionDescriptor_t convolution;
292  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
293  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, p1, p2, s1, s2, 1, 1, mode, data_type));
294 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
295  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
296 #endif
297 
298  // Find the algorithm to use
299  cudnnConvolutionBwdFilterAlgoPerf_t algo;
300  int algos = 0;
301  cudnn_check(cudnnGetConvolutionBackwardFilterAlgorithm_v7(handle.get(), *input_tensor, *output_tensor, convolution, *filter,
302  1, &algos, &algo));
303 
304  // Prepare the workspace
306  if (algo.memory) {
307  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
308  }
309 
310  // Allocate GPU memory, if necessary
311 
312  input.ensure_gpu_up_to_date();
313  kernel.ensure_gpu_up_to_date();
314  conv.ensure_gpu_allocated();
315 
316  // Perform the convolution
317 
318  cudnn_check(cudnnConvolutionBackwardFilter(handle.get(), alpha, *input_tensor, input.gpu_memory(), *output_tensor, kernel.gpu_memory(), convolution,
319  algo.algo, workspace.get(), algo.memory, beta, *filter, conv.gpu_memory()));
320 
321  conv.validate_gpu();
322  conv.invalidate_cpu();
323 
324  // Release the resources
325  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
326 }
327 
336 template <typename I, typename K, typename C>
337 void conv4_backward_filter([[maybe_unused]] I&& input,
338  [[maybe_unused]] K&& kernel,
339  [[maybe_unused]] C&& conv,
340  [[maybe_unused]] size_t s1,
341  [[maybe_unused]] size_t s2,
342  [[maybe_unused]] size_t p1,
343  [[maybe_unused]] size_t p2) {
344  if constexpr (conv_possible<I, K, C>) {
345  conv4_backward_filter_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CONVOLUTION);
346  } else {
347  cpp_unreachable("CUDNN not available/enabled");
348  }
349 }
350 
359 template <typename I, typename K, typename C>
360 void conv4_backward_filter_flipped([[maybe_unused]] I&& input,
361  [[maybe_unused]] K&& kernel,
362  [[maybe_unused]] C&& conv,
363  [[maybe_unused]] size_t s1,
364  [[maybe_unused]] size_t s2,
365  [[maybe_unused]] size_t p1,
366  [[maybe_unused]] size_t p2) {
367  if constexpr (conv_possible<I, K, C>) {
368  conv4_backward_filter_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CROSS_CORRELATION);
369  } else {
370  cpp_unreachable("CUDNN not available/enabled");
371  }
372 }
373 
380 template <typename I, typename K, typename C>
381 void conv2_full_set(I&& input, K&& kernel, C&& conv, cudnnConvolutionMode_t mode) {
382  using type = std::remove_const_t<value_t<I>>;
383 
384  type alpha[] = {1.0f};
385  type beta[] = {0.0f};
386 
387  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
388 
389  decltype(auto) handle = start_cudnn();
390 
391  // Prepare the tensors
392  auto input_tensor = create_tensor_wrapper(input);
393  auto output_tensor = create_tensor_wrapper(conv);
394  auto filter = create_filter_wrapper(kernel);
395 
396  // Prepare the convolution
397  cudnnConvolutionDescriptor_t convolution;
398  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
399  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, 0, 0, 1, 1, 1, 1, mode, data_type));
400 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
401  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
402 #endif
403 
404  // Find the algorithm to use
405  cudnnConvolutionBwdDataAlgoPerf_t algo;
406  int algos = 0;
407  cudnn_check(cudnnGetConvolutionBackwardDataAlgorithm_v7(handle.get(), *filter, *input_tensor, convolution, *output_tensor,
408  1, &algos, &algo));
409 
410  // Prepare the workspace
412  if (algo.memory) {
413  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
414  }
415 
416  // Allocate GPU memory, if necessary
417 
418  input.ensure_gpu_up_to_date();
419  kernel.ensure_gpu_up_to_date();
420  conv.ensure_gpu_allocated();
421 
422  // Perform the convolution
423 
424  cudnn_check(cudnnConvolutionBackwardData(handle.get(), alpha, *filter, kernel.gpu_memory(), *input_tensor, input.gpu_memory(), convolution, algo.algo,
425  workspace.get(), algo.memory, beta, *output_tensor, conv.gpu_memory()));
426 
427  conv.validate_gpu();
428  conv.invalidate_cpu();
429 
430  // Release the resources
431  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
432 }
433 
440 template <typename I, typename K, typename C>
441 void conv2_full([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
442  if constexpr (conv_possible<I, K, C>) {
443  conv2_full_set(input, kernel, conv, CUDNN_CROSS_CORRELATION);
444  } else {
445  cpp_unreachable("CUDNN not available/enabled");
446  }
447 }
448 
455 template <typename I, typename K, typename C>
456 void conv2_full_flipped([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
457  if constexpr (conv_possible<I, K, C>) {
458  conv2_full_set(input, kernel, conv, CUDNN_CONVOLUTION);
459  } else {
460  cpp_unreachable("CUDNN not available/enabled");
461  }
462 }
463 
470 template <typename I, typename K, typename C>
471 void conv2_valid_multi_set(I& input, K&& kernel, C&& conv, size_t s1, size_t s2, size_t p1, size_t p2, cudnnConvolutionMode_t mode) {
472  using type = std::remove_const_t<value_t<I>>;
473 
474  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
475 
476  type alpha[] = {1.0f};
477  type beta[] = {0.0f};
478 
479  decltype(auto) handle = start_cudnn();
480 
481  // Prepare the input tensor
482  cudnnTensorDescriptor_t input_tensor;
483  cudnn_check(cudnnCreateTensorDescriptor(&input_tensor));
484  cudnn_check(cudnnSetTensor4dDescriptor(input_tensor, CUDNN_TENSOR_NCHW, data_type, 1, 1, etl::dim<0>(input), etl::dim<1>(input)));
485 
486  // Prepare the output tensor
487  cudnnTensorDescriptor_t output_tensor;
488  cudnn_check(cudnnCreateTensorDescriptor(&output_tensor));
489  cudnn_check(cudnnSetTensor4dDescriptor(output_tensor, CUDNN_TENSOR_NCHW, data_type, 1, etl::dim<0>(conv), etl::dim<1>(conv), etl::dim<2>(conv)));
490 
491  // Prepare the filter
492  cudnnFilterDescriptor_t filter;
493  cudnn_check(cudnnCreateFilterDescriptor(&filter));
494  cudnn_check(cudnnSetFilter4dDescriptor(filter, data_type, CUDNN_TENSOR_NCHW, etl::dim<0>(kernel), 1, etl::dim<1>(kernel), etl::dim<2>(kernel)));
495 
496  // Prepare the convolution
497  cudnnConvolutionDescriptor_t convolution;
498  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
499  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, p1, p2, s1, s2, 1, 1, mode, data_type));
500 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
501  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
502 #endif
503 
504  // Find the algorithm to use
505  cudnnConvolutionFwdAlgoPerf_t algo;
506  int algos = 0;
507  cudnn_check(cudnnGetConvolutionForwardAlgorithm_v7(handle.get(), input_tensor, filter, convolution, output_tensor,
508  1, &algos, &algo));
509 
510  // Prepare the workspace
512  if (algo.memory) {
513  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
514  }
515 
516  // Allocate GPU memory, if necessary
517 
518  input.ensure_gpu_up_to_date();
519  kernel.ensure_gpu_up_to_date();
520  conv.ensure_gpu_allocated();
521 
522  // Perform the convolution
523 
524  cudnn_check(cudnnConvolutionForward(handle.get(), alpha, input_tensor, input.gpu_memory(), filter, kernel.gpu_memory(), convolution, algo.algo,
525  workspace.get(), algo.memory, beta, output_tensor, conv.gpu_memory()));
526 
527  conv.validate_gpu();
528  conv.invalidate_cpu();
529 
530  // Release the resources
531  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
532  cudnn_check(cudnnDestroyFilterDescriptor(filter));
533  cudnn_check(cudnnDestroyTensorDescriptor(output_tensor));
534  cudnn_check(cudnnDestroyTensorDescriptor(input_tensor));
535 }
536 
543 template <typename I, typename K, typename C>
544 void conv2_valid_multi([[maybe_unused]] I&& input,
545  [[maybe_unused]] K&& kernel,
546  [[maybe_unused]] C&& conv,
547  [[maybe_unused]] size_t s1,
548  [[maybe_unused]] size_t s2,
549  [[maybe_unused]] size_t p1,
550  [[maybe_unused]] size_t p2) {
551  if constexpr (conv_possible<I, K, C>) {
552  conv2_valid_multi_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CONVOLUTION);
553  } else {
554  cpp_unreachable("CUDNN not available/enabled");
555  }
556 }
557 
564 template <typename I, typename K, typename C>
565 void conv2_valid_multi_flipped([[maybe_unused]] I&& input,
566  [[maybe_unused]] K&& kernel,
567  [[maybe_unused]] C&& conv,
568  [[maybe_unused]] size_t s1,
569  [[maybe_unused]] size_t s2,
570  [[maybe_unused]] size_t p1,
571  [[maybe_unused]] size_t p2) {
572  if constexpr (conv_possible<I, K, C>) {
573  conv2_valid_multi_set(input, kernel, conv, s1, s2, p1, p2, CUDNN_CROSS_CORRELATION);
574  } else {
575  cpp_unreachable("CUDNN not available/enabled");
576  }
577 }
578 
585 template <typename I, typename K, typename C>
586 void conv4_backward_data_set(I&& input, K&& kernel, C&& conv, cudnnConvolutionMode_t mode, size_t s1, size_t s2, size_t p1, size_t p2) {
587  using type = value_t<I>;
588 
589  type alpha[] = {1.0f};
590  type beta[] = {0.0f};
591 
592  auto data_type = std::is_same_v<type, float> ? CUDNN_DATA_FLOAT : CUDNN_DATA_DOUBLE;
593 
594  decltype(auto) handle = start_cudnn();
595 
596  // Prepare the tensors
597  auto input_tensor = create_tensor_wrapper(input);
598  auto output_tensor = create_tensor_wrapper(conv);
599  auto filter = create_filter_wrapper(kernel);
600 
601  // Prepare the convolution
602  cudnnConvolutionDescriptor_t convolution;
603  cudnn_check(cudnnCreateConvolutionDescriptor(&convolution));
604  cudnn_check(cudnnSetConvolution2dDescriptor(convolution, p1, p2, s1, s2, 1, 1, mode, data_type));
605 #ifdef ETL_EXPERIMENTAL_TENSOR_CORES
606  cudnn_check(cudnnSetConvolutionMathType(convolution, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
607 #endif
608 
609  // Find the algorithm to use
610  cudnnConvolutionBwdDataAlgoPerf_t algo;
611  int algos = 0;
612  cudnn_check(cudnnGetConvolutionBackwardDataAlgorithm_v7(handle.get(), *filter, *input_tensor, convolution, *output_tensor,
613  1, &algos, &algo));
614 
615  // Prepare the workspace
617  if (algo.memory) {
618  workspace = impl::cuda::cuda_allocate_only<type>(algo.memory);
619  }
620 
621  // Allocate GPU memory, if necessary
622 
623  input.ensure_gpu_up_to_date();
624  kernel.ensure_gpu_up_to_date();
625  conv.ensure_gpu_allocated();
626 
627  // Perform the convolution
628 
629  cudnn_check(cudnnConvolutionBackwardData(handle.get(), alpha, *filter, kernel.gpu_memory(), *input_tensor, input.gpu_memory(), convolution, algo.algo,
630  workspace.get(), algo.memory, beta, *output_tensor, conv.gpu_memory()));
631 
632  conv.validate_gpu();
633  conv.invalidate_cpu();
634 
635  // Release the resources
636  cudnn_check(cudnnDestroyConvolutionDescriptor(convolution));
637 }
638 
645 template <typename I, typename K, typename C>
646 void conv4_backward_data([[maybe_unused]] I&& input,
647  [[maybe_unused]] K&& kernel,
648  [[maybe_unused]] C&& conv,
649  [[maybe_unused]] size_t s1,
650  [[maybe_unused]] size_t s2,
651  [[maybe_unused]] size_t p1,
652  [[maybe_unused]] size_t p2) {
653  if constexpr (conv_possible<I, K, C>) {
654  conv4_backward_data_set(input, kernel, conv, CUDNN_CROSS_CORRELATION, s1, s2, p1, p2);
655  } else {
656  cpp_unreachable("CUDNN not available/enabled");
657  }
658 }
659 
666 template <typename I, typename K, typename C>
667 void conv4_backward_data_flipped([[maybe_unused]] I&& input,
668  [[maybe_unused]] K&& kernel,
669  [[maybe_unused]] C&& conv,
670  [[maybe_unused]] size_t s1,
671  [[maybe_unused]] size_t s2,
672  [[maybe_unused]] size_t p1,
673  [[maybe_unused]] size_t p2) {
674  if constexpr (conv_possible<I, K, C>) {
675  conv4_backward_data_set(input, kernel, conv, CUDNN_CONVOLUTION, s1, s2, p1, p2);
676  } else {
677  cpp_unreachable("CUDNN not available/enabled");
678  }
679 }
680 
687 template <typename I, typename K, typename C>
688 void conv4_backward_data_full([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
689  if constexpr (conv_possible<I, K, C>) {
690  conv4_backward_data_set(input, kernel, conv, CUDNN_CROSS_CORRELATION, 1, 1, 0, 0);
691  } else {
692  cpp_unreachable("CUDNN not available/enabled");
693  }
694 }
695 
702 template <typename I, typename K, typename C>
703 void conv4_backward_data_full_flipped([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
704  if constexpr (conv_possible<I, K, C>) {
705  conv4_backward_data_set(input, kernel, conv, CUDNN_CONVOLUTION, 1, 1, 0, 0);
706  } else {
707  cpp_unreachable("CUDNN not available/enabled");
708  }
709 }
710 
711 #else
712 
713 //COVERAGE_EXCLUDE_BEGIN
714 
725 template <typename I, typename K, typename C>
726 void conv2_valid([[maybe_unused]] I&& input,
727  [[maybe_unused]] K&& kernel,
728  [[maybe_unused]] C&& conv,
729  [[maybe_unused]] size_t s1,
730  [[maybe_unused]] size_t s2,
731  [[maybe_unused]] size_t p1,
732  [[maybe_unused]] size_t p2) {
733  cpp_unreachable("CUDNN not available/enabled");
734 }
735 
746 template <typename I, typename K, typename C>
747 void conv2_valid_flipped([[maybe_unused]] I&& input,
748  [[maybe_unused]] K&& kernel,
749  [[maybe_unused]] C&& conv,
750  [[maybe_unused]] size_t s1,
751  [[maybe_unused]] size_t s2,
752  [[maybe_unused]] size_t p1,
753  [[maybe_unused]] size_t p2) {
754  cpp_unreachable("CUDNN not available/enabled");
755 }
756 
763 template <typename I, typename K, typename C>
764 void conv4_forward([[maybe_unused]] I&& input,
765  [[maybe_unused]] K&& kernel,
766  [[maybe_unused]] C&& conv,
767  [[maybe_unused]] size_t s1,
768  [[maybe_unused]] size_t s2,
769  [[maybe_unused]] size_t p1,
770  [[maybe_unused]] size_t p2) {
771  cpp_unreachable("Unsupported feature called: cudnn conv4_valid");
772 }
773 
780 template <typename I, typename K, typename C>
781 void conv4_forward_flipped([[maybe_unused]] I&& input,
782  [[maybe_unused]] K&& kernel,
783  [[maybe_unused]] C&& conv,
784  [[maybe_unused]] size_t s1,
785  [[maybe_unused]] size_t s2,
786  [[maybe_unused]] size_t p1,
787  [[maybe_unused]] size_t p2) {
788  cpp_unreachable("Unsupported feature called: cudnn conv4_valid_flipped");
789 }
790 
799 template <typename I, typename K, typename C>
800 void conv4_backward_filter([[maybe_unused]] I&& input,
801  [[maybe_unused]] K&& kernel,
802  [[maybe_unused]] C&& conv,
803  [[maybe_unused]] size_t s1,
804  [[maybe_unused]] size_t s2,
805  [[maybe_unused]] size_t p1,
806  [[maybe_unused]] size_t p2) {
807  cpp_unreachable("Unsupported feature called: cudnn conv4_valid_filter");
808 }
809 
818 template <typename I, typename K, typename C>
819 void conv4_backward_filter_flipped([[maybe_unused]] I&& input,
820  [[maybe_unused]] K&& kernel,
821  [[maybe_unused]] C&& conv,
822  [[maybe_unused]] size_t s1,
823  [[maybe_unused]] size_t s2,
824  [[maybe_unused]] size_t p1,
825  [[maybe_unused]] size_t p2) {
826  cpp_unreachable("Unsupported feature called: cudnn conv4_backward_filter_flipped");
827 }
828 
835 template <typename I, typename K, typename C>
836 void conv2_full([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
837  cpp_unreachable("Unsupported feature called: cudnn conv2_full");
838 }
839 
846 template <typename I, typename K, typename C>
847 void conv2_full_flipped([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
848  cpp_unreachable("Unsupported feature called: cudnn conv2_full_flipped");
849 }
850 
857 template <typename I, typename K, typename C>
858 void conv4_backward_data_full([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
859  cpp_unreachable("Unsupported feature called: cudnn conv4_full");
860 }
861 
868 template <typename I, typename K, typename C>
869 void conv4_backward_data_full_flipped([[maybe_unused]] I&& input, [[maybe_unused]] K&& kernel, [[maybe_unused]] C&& conv) {
870  cpp_unreachable("Unsupported feature called: cudnn conv4_ful_flippedl");
871 }
872 
879 template <typename I, typename K, typename C>
880 void conv2_valid_multi([[maybe_unused]] I&& input,
881  [[maybe_unused]] K&& kernel,
882  [[maybe_unused]] C&& conv,
883  [[maybe_unused]] size_t s1,
884  [[maybe_unused]] size_t s2,
885  [[maybe_unused]] size_t p1,
886  [[maybe_unused]] size_t p2) {
887  cpp_unreachable("Unsupported feature called: cudnn conv2_valid_multi");
888 }
889 
896 template <typename I, typename K, typename C>
897 void conv2_valid_multi_flipped([[maybe_unused]] I&& input,
898  [[maybe_unused]] K&& kernel,
899  [[maybe_unused]] C&& conv,
900  [[maybe_unused]] size_t s1,
901  [[maybe_unused]] size_t s2,
902  [[maybe_unused]] size_t p1,
903  [[maybe_unused]] size_t p2) {
904  cpp_unreachable("Unsupported feature called: cudnn conv2_valid_multi_flipped");
905 }
906 
913 template <typename I, typename K, typename C>
914 void conv4_backward_data([[maybe_unused]] I&& input,
915  [[maybe_unused]] K&& kernel,
916  [[maybe_unused]] C&& conv,
917  [[maybe_unused]] size_t s1,
918  [[maybe_unused]] size_t s2,
919  [[maybe_unused]] size_t p1,
920  [[maybe_unused]] size_t p2) {
921  cpp_unreachable("Unsupported feature called: cudnn conv4_backward_data");
922 }
923 
930 template <typename I, typename K, typename C>
931 void conv4_backward_data_flipped([[maybe_unused]] I&& input,
932  [[maybe_unused]] K&& kernel,
933  [[maybe_unused]] C&& conv,
934  [[maybe_unused]] size_t s1,
935  [[maybe_unused]] size_t s2,
936  [[maybe_unused]] size_t p1,
937  [[maybe_unused]] size_t p2) {
938  cpp_unreachable("Unsupported feature called: cudnn conv4_backward_data_flipped");
939 }
940 
941  //COVERAGE_EXCLUDE_END
942 
943 #endif
944 
945 } //end of namespace etl::impl::cudnn
void conv4_backward_data_full([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv)
cudnn implementation of a 4D &#39;full&#39; convolution C = I * K
Definition: conv.hpp:858
void conv2_valid([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
CUDNN implementation of a 2D &#39;valid&#39; convolution C = I * K.
Definition: conv.hpp:726
Definition: bias_add.hpp:24
constexpr bool conv_possible_
Traits indicating if Convolution with CUDNN is possible for the given configuration.
Definition: conv.hpp:45
void conv4_forward([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
cudnn implementation of a 4D &#39;valid&#39; convolution C = I * K
Definition: conv.hpp:764
void conv2_valid_multi([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
CUDNN implementation of a 2D &#39;valid&#39; convolution C = I * K.
Definition: conv.hpp:880
void conv4_backward_filter_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
CUDNN implementation of a 4D &#39;valid&#39; convolution C = I * K, where the output are considered to be ker...
Definition: conv.hpp:819
void conv4_backward_data_full_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv)
cudnn implementation of a 2D &#39;valid&#39; convolution C = I * K, with multiple kernels ...
Definition: conv.hpp:869
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
void conv4_backward_data([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
cudnn implementation of a 4D &#39;valid&#39; backward convolution C = I * K
Definition: conv.hpp:914
void conv4_backward_data_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
cudnn implementation of a 2D &#39;valid&#39; backward convolution C = I * K
Definition: conv.hpp:931
Wrapper for CUDA memory (when disabled CUDA support)
Definition: cuda_memory.hpp:233
cudnn_handle & start_cudnn()
Start cudnn and return a RTTI helper over a raw cudnn handle.
Definition: cudnn.hpp:75
void conv4_forward_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
cudnn implementation of a 4D &#39;valid&#39; convolution C = I * K, with flipped weights
Definition: conv.hpp:781
cudnn_wrapper< cudnnTensorDescriptor_t > create_tensor_wrapper(I &&input)
Create a CUDNN tensor for the given input matrix.
Definition: cudnn.hpp:246
void conv2_valid_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
CUDNN implementation of a 2D &#39;valid&#39; convolution C = I * K.
Definition: conv.hpp:747
Utility functions for cudnn.
constexpr bool conv_possible
Traits indicating if Convolution with CUDNN is possible for the given configuration.
Definition: conv.hpp:35
void conv2_valid_multi_flipped([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
Standard implementation of a 2D &#39;valid&#39; convolution C = I * K, with multiple flipped kernels...
Definition: conv.hpp:897
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void conv4_backward_filter([[maybe_unused]] I &&input, [[maybe_unused]] K &&kernel, [[maybe_unused]] C &&conv, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
CUDNN implementation of a 4D &#39;valid&#39; convolution C = I * K, where the output are considered to be ker...
Definition: conv.hpp:800
cudnn_wrapper< cudnnFilterDescriptor_t > create_filter_wrapper(I &&kernel)
Create a CUDNN filter tensor for the given input matrix.
Definition: cudnn.hpp:452