Expression Templates Library (ETL)
fft.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include "etl/impl/std/fft.hpp"
11 #include "etl/impl/blas/fft.hpp"
12 #include "etl/impl/cufft/fft.hpp"
13 
14 namespace etl::detail {
15 
19 enum class precision {
20  S,
21  D,
22  C,
23  Z
24 };
25 
34 constexpr fft_impl select_default_fft1_impl(bool no_gpu) {
35  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
36  constexpr bool mkl = mkl_enabled;
37  constexpr bool cufft = cufft_enabled;
38 
39  if (cufft && !no_gpu) {
40  return fft_impl::CUFFT;
41  } else if (mkl) {
42  return fft_impl::MKL;
43  } else {
44  return fft_impl::STD;
45  }
46 }
47 
57 constexpr fft_impl select_default_fft1_many_impl(bool no_gpu) {
58  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
59  constexpr bool mkl = mkl_enabled;
60  constexpr bool cufft = cufft_enabled;
61 
62  //Note: more testing would probably improve this selection
63 
64  if (cufft && !no_gpu) {
65  return fft_impl::CUFFT;
66  } else if (mkl) {
67  return fft_impl::MKL;
68  } else {
69  return fft_impl::STD;
70  }
71 }
72 
81 constexpr fft_impl select_default_ifft1_impl(bool no_gpu) {
82  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
83  constexpr bool mkl = mkl_enabled;
84  constexpr bool cufft = cufft_enabled;
85 
86  if (cufft && !no_gpu) {
87  return fft_impl::CUFFT;
88  } else if (mkl) {
89  return fft_impl::MKL;
90  } else {
91  return fft_impl::STD;
92  }
93 }
94 
104 constexpr fft_impl select_default_fft2_impl(bool no_gpu) {
105  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
106  constexpr bool mkl = mkl_enabled;
107  constexpr bool cufft = cufft_enabled;
108 
109  if (cufft && !no_gpu) {
110  return fft_impl::CUFFT;
111  } else if (mkl) {
112  return fft_impl::MKL;
113  } else {
114  return fft_impl::STD;
115  }
116 }
117 
128 constexpr fft_impl select_default_fft2_many_impl(bool no_gpu) {
129  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
130  constexpr bool mkl = mkl_enabled;
131  constexpr bool cufft = cufft_enabled;
132 
133  //Note: more testing would probably improve this selection
134 
135  if (cufft && !no_gpu) {
136  return fft_impl::CUFFT;
137  } else if (mkl) {
138  return fft_impl::MKL;
139  } else {
140  return fft_impl::STD;
141  }
142 }
143 
144 #ifdef ETL_MANUAL_SELECT
145 
154 inline fft_impl select_forced_fft_impl(fft_impl def) {
155  //Note since these boolean will be known at compile time, the conditions will be a lot simplified
156  constexpr bool mkl = mkl_enabled;
157  constexpr bool cufft = cufft_enabled;
158 
159  if (local_context().fft_selector.forced) {
160  auto forced = local_context().fft_selector.impl;
161 
162  switch (forced) {
163  //MKL cannot always be used
164  case fft_impl::MKL:
165  if (!mkl) { //COVERAGE_EXCLUDE_LINE
166  std::cerr << "Forced selection to MKL fft implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
167  return def; //COVERAGE_EXCLUDE_LINE
168  } //COVERAGE_EXCLUDE_LINE
169 
170  return forced;
171 
172  //CUFFT cannot always be used
173  case fft_impl::CUFFT:
174  if (!cufft || local_context().cpu) { //COVERAGE_EXCLUDE_LINE
175  std::cerr << "Forced selection to CUFFT fft implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
176  return def; //COVERAGE_EXCLUDE_LINE
177  } //COVERAGE_EXCLUDE_LINE
178 
179  return forced;
180 
181  //In other cases, simply use the forced impl
182  default:
183  return forced;
184  }
185  }
186 
187  return def;
188 }
189 
195 inline fft_impl select_fft1_impl() {
196  return select_forced_fft_impl(select_default_fft1_impl(local_context().cpu));
197 }
198 
205 inline fft_impl select_fft1_many_impl() {
206  return select_forced_fft_impl(select_default_fft1_many_impl(local_context().cpu));
207 }
208 
214 inline fft_impl select_ifft1_impl() {
215  return select_forced_fft_impl(select_default_ifft1_impl(local_context().cpu));
216 }
217 
224 inline fft_impl select_fft2_impl() {
225  return select_forced_fft_impl(select_default_fft2_impl(local_context().cpu));
226 }
227 
235 inline fft_impl select_fft2_many_impl() {
236  return select_forced_fft_impl(select_default_fft2_many_impl(local_context().cpu));
237 }
238 
239 #else
240 
246 constexpr fft_impl select_fft1_impl() {
247  return (select_default_fft1_impl(false));
248 }
249 
256 constexpr fft_impl select_fft1_many_impl() {
257  return (select_default_fft1_many_impl(false));
258 }
259 
265 constexpr fft_impl select_ifft1_impl() {
266  return (select_default_ifft1_impl(false));
267 }
268 
275 constexpr fft_impl select_fft2_impl() {
276  return (select_default_fft2_impl(false));
277 }
278 
286 constexpr fft_impl select_fft2_many_impl() {
287  return (select_default_fft2_many_impl(false));
288 }
289 
290 #endif
291 
295 struct fft1_impl {
300  template <typename A>
301  static constexpr bool gpu_computable = cufft_enabled;
302 
308  template <typename A, typename C>
309  static void apply(A&& a, C&& c) {
310  constexpr_select auto impl = select_fft1_impl();
311 
312  if
313  constexpr_select(impl == fft_impl::STD) {
314  inc_counter("impl:std");
315  etl::impl::standard::fft1(smart_forward(a), c);
316  }
317  else if
318  constexpr_select(impl == fft_impl::MKL) {
319  inc_counter("impl:mkl");
320  etl::impl::blas::fft1(smart_forward(a), c);
321  }
322  else if
323  constexpr_select(impl == fft_impl::CUFFT) {
324  inc_counter("impl:cufft");
325  etl::impl::cufft::fft1(smart_forward_gpu(a), c);
326  }
327  }
328 };
329 
338  template <typename A>
339  static constexpr bool gpu_computable = cufft_enabled;
340 
346  template <typename C>
347  static void apply(C&& c) {
348  constexpr_select auto impl = select_fft1_impl();
349 
350  if
351  constexpr_select(impl == fft_impl::STD) {
352  inc_counter("impl:std");
353  etl::impl::standard::fft1(c, c);
354  }
355  else if
356  constexpr_select(impl == fft_impl::MKL) {
357  inc_counter("impl:mkl");
358  etl::impl::blas::fft1(c, c);
359  }
360  else if
361  constexpr_select(impl == fft_impl::CUFFT) {
362  inc_counter("impl:cufft");
363  etl::impl::cufft::inplace_fft1(c);
364  }
365  }
366 };
367 
371 struct ifft1_impl {
376  template <typename A>
377  static constexpr bool gpu_computable = cufft_enabled;
378 
384  template <typename A, typename C>
385  static void apply(A&& a, C&& c) {
386  constexpr_select auto impl = select_ifft1_impl();
387 
388  if
389  constexpr_select(impl == fft_impl::STD) {
390  inc_counter("impl:std");
391  etl::impl::standard::ifft1(smart_forward(a), c);
392  }
393  else if
394  constexpr_select(impl == fft_impl::MKL) {
395  inc_counter("impl:mkl");
396  etl::impl::blas::ifft1(smart_forward(a), c);
397  }
398  else if
399  constexpr_select(impl == fft_impl::CUFFT) {
400  inc_counter("impl:cufft");
401  etl::impl::cufft::ifft1(smart_forward_gpu(a), c);
402  }
403  }
404 };
405 
414  template <typename A>
415  static constexpr bool gpu_computable = cufft_enabled;
416 
422  template <typename C>
423  static void apply(C&& c) {
424  constexpr_select auto impl = select_ifft1_impl();
425 
426  if
427  constexpr_select(impl == fft_impl::STD) {
428  inc_counter("impl:std");
429  etl::impl::standard::ifft1(c, c);
430  }
431  else if
432  constexpr_select(impl == fft_impl::MKL) {
433  inc_counter("impl:mkl");
434  etl::impl::blas::ifft1(c, c);
435  }
436  else if
437  constexpr_select(impl == fft_impl::CUFFT) {
438  inc_counter("impl:cufft");
439  etl::impl::cufft::inplace_ifft1(c);
440  }
441  }
442 };
443 
452  template <typename A>
453  static constexpr bool gpu_computable = cufft_enabled;
454 
460  template <typename A, typename C>
461  static void apply(A&& a, C&& c) {
462  constexpr_select auto impl = select_ifft1_impl();
463 
464  if
465  constexpr_select(impl == fft_impl::STD) {
466  inc_counter("impl:std");
467  etl::impl::standard::ifft1_real(smart_forward(a), c);
468  }
469  else if
470  constexpr_select(impl == fft_impl::MKL) {
471  inc_counter("impl:mkl");
472  etl::impl::blas::ifft1_real(smart_forward(a), c);
473  }
474  else if
475  constexpr_select(impl == fft_impl::CUFFT) {
476  inc_counter("impl:cufft");
477  etl::impl::cufft::ifft1_real(smart_forward_gpu(a), c);
478  }
479  }
480 };
481 
485 struct fft2_impl {
490  template <typename A>
491  static constexpr bool gpu_computable = cufft_enabled;
492 
498  template <typename A, typename C>
499  static void apply(A&& a, C&& c) {
500  constexpr_select auto impl = select_fft2_impl();
501 
502  if
503  constexpr_select(impl == fft_impl::STD) {
504  inc_counter("impl:std");
505  etl::impl::standard::fft2(smart_forward(a), c);
506  }
507  else if
508  constexpr_select(impl == fft_impl::MKL) {
509  inc_counter("impl:mkl");
510  etl::impl::blas::fft2(smart_forward(a), c);
511  }
512  else if
513  constexpr_select(impl == fft_impl::CUFFT) {
514  inc_counter("impl:cufft");
515  etl::impl::cufft::fft2(smart_forward_gpu(a), c);
516  }
517  }
518 };
519 
528  template <typename A>
529  static constexpr bool gpu_computable = cufft_enabled;
530 
536  template <typename C>
537  static void apply(C&& c) {
538  constexpr_select auto impl = select_fft2_impl();
539 
540  if
541  constexpr_select(impl == fft_impl::STD) {
542  inc_counter("impl:std");
543  etl::impl::standard::fft2(c, c);
544  }
545  else if
546  constexpr_select(impl == fft_impl::MKL) {
547  inc_counter("impl:mkl");
548  etl::impl::blas::fft2(c, c);
549  }
550  else if
551  constexpr_select(impl == fft_impl::CUFFT) {
552  inc_counter("impl:cufft");
553  etl::impl::cufft::inplace_fft2(c);
554  }
555  }
556 };
557 
561 struct ifft2_impl {
566  template <typename A>
567  static constexpr bool gpu_computable = cufft_enabled;
568 
574  template <typename A, typename C>
575  static void apply(A&& a, C&& c) {
576  constexpr_select auto impl = select_fft2_impl();
577 
578  if
579  constexpr_select(impl == fft_impl::STD) {
580  inc_counter("impl:std");
581  etl::impl::standard::ifft2(smart_forward(a), c);
582  }
583  else if
584  constexpr_select(impl == fft_impl::MKL) {
585  inc_counter("impl:mkl");
586  etl::impl::blas::ifft2(smart_forward(a), c);
587  }
588  else if
589  constexpr_select(impl == fft_impl::CUFFT) {
590  inc_counter("impl:cufft");
591  etl::impl::cufft::ifft2(smart_forward_gpu(a), c);
592  }
593  }
594 };
595 
604  template <typename A>
605  static constexpr bool gpu_computable = cufft_enabled;
606 
612  template <typename C>
613  static void apply(C&& c) {
614  constexpr_select auto impl = select_fft2_impl();
615 
616  if
617  constexpr_select(impl == fft_impl::STD) {
618  inc_counter("impl:std");
619  etl::impl::standard::ifft2(c, c);
620  }
621  else if
622  constexpr_select(impl == fft_impl::MKL) {
623  inc_counter("impl:mkl");
624  etl::impl::blas::ifft2(c, c);
625  }
626  else if
627  constexpr_select(impl == fft_impl::CUFFT) {
628  inc_counter("impl:cufft");
629  etl::impl::cufft::inplace_ifft2(c);
630  }
631  }
632 };
633 
642  template <typename A>
643  static constexpr bool gpu_computable = cufft_enabled;
644 
650  template <typename A, typename C>
651  static void apply(A&& a, C&& c) {
652  constexpr_select auto impl = select_fft2_impl();
653 
654  if
655  constexpr_select(impl == fft_impl::STD) {
656  inc_counter("impl:std");
657  etl::impl::standard::ifft2_real(smart_forward(a), c);
658  }
659  else if
660  constexpr_select(impl == fft_impl::MKL) {
661  inc_counter("impl:mkl");
662  etl::impl::blas::ifft2_real(smart_forward(a), c);
663  }
664  else if
665  constexpr_select(impl == fft_impl::CUFFT) {
666  inc_counter("impl:cufft");
667  etl::impl::cufft::ifft2_real(smart_forward_gpu(a), c);
668  }
669  }
670 };
671 
680  template <typename A>
681  static constexpr bool gpu_computable = cufft_enabled;
682 
688  template <typename A, typename C>
689  static void apply(A&& a, C&& c) {
690  const auto impl = select_fft1_many_impl();
691 
692  if
693  constexpr_select(impl == fft_impl::STD) {
694  inc_counter("impl:std");
695  etl::impl::standard::fft1_many(smart_forward(a), c);
696  }
697  else if
698  constexpr_select(impl == fft_impl::MKL) {
699  inc_counter("impl:mkl");
700  etl::impl::blas::fft1_many(smart_forward(a), c);
701  }
702  else if
703  constexpr_select(impl == fft_impl::CUFFT) {
704  inc_counter("impl:cufft");
705  etl::impl::cufft::fft1_many(smart_forward_gpu(a), c);
706  }
707  }
708 };
709 
718  template <typename A>
719  static constexpr bool gpu_computable = cufft_enabled;
720 
726  template <typename C>
727  static void apply(C&& c) {
728  const auto impl = select_fft1_many_impl();
729 
730  if
731  constexpr_select(impl == fft_impl::STD) {
732  inc_counter("impl:std");
733  etl::impl::standard::fft1_many(c, c);
734  }
735  else if
736  constexpr_select(impl == fft_impl::MKL) {
737  inc_counter("impl:mkl");
738  etl::impl::blas::fft1_many(c, c);
739  }
740  else if
741  constexpr_select(impl == fft_impl::CUFFT) {
742  inc_counter("impl:cufft");
743  etl::impl::cufft::inplace_fft1_many(c);
744  }
745  }
746 };
747 
756  template <typename A>
757  static constexpr bool gpu_computable = cufft_enabled;
758 
764  template <typename A, typename C>
765  static void apply(A&& a, C&& c) {
766  const auto impl = select_fft2_many_impl();
767 
768  if
769  constexpr_select(impl == fft_impl::STD) {
770  inc_counter("impl:std");
771  etl::impl::standard::fft2_many(smart_forward(a), c);
772  }
773  else if
774  constexpr_select(impl == fft_impl::MKL) {
775  inc_counter("impl:mkl");
776  etl::impl::blas::fft2_many(smart_forward(a), c);
777  }
778  else if
779  constexpr_select(impl == fft_impl::CUFFT) {
780  inc_counter("impl:cufft");
781  etl::impl::cufft::fft2_many(smart_forward_gpu(a), c);
782  }
783  }
784 };
785 
794  template <typename A>
795  static constexpr bool gpu_computable = cufft_enabled;
796 
802  template <typename C>
803  static void apply(C&& c) {
804  const auto impl = select_fft2_many_impl();
805 
806  if
807  constexpr_select(impl == fft_impl::STD) {
808  inc_counter("impl:std");
809  etl::impl::standard::fft2_many(c, c);
810  }
811  else if
812  constexpr_select(impl == fft_impl::MKL) {
813  inc_counter("impl:mkl");
814  etl::impl::blas::fft2_many(c, c);
815  }
816  else if
817  constexpr_select(impl == fft_impl::CUFFT) {
818  inc_counter("impl:cufft");
819  etl::impl::cufft::inplace_fft2_many(c);
820  }
821  }
822 };
823 
832  template <typename A>
833  static constexpr bool gpu_computable = cufft_enabled;
834 
840  template <typename A, typename C>
841  static void apply(A&& a, C&& c) {
842  constexpr_select auto impl = select_fft1_many_impl();
843 
844  if
845  constexpr_select(impl == fft_impl::STD) {
846  inc_counter("impl:std");
847  etl::impl::standard::ifft1_many(smart_forward(a), c);
848  }
849  else if
850  constexpr_select(impl == fft_impl::MKL) {
851  inc_counter("impl:mkl");
852  etl::impl::blas::ifft1_many(smart_forward(a), c);
853  }
854  else if
855  constexpr_select(impl == fft_impl::CUFFT) {
856  inc_counter("impl:cufft");
857  etl::impl::cufft::ifft1_many(smart_forward_gpu(a), c);
858  }
859  }
860 };
861 
870  template <typename A>
871  static constexpr bool gpu_computable = cufft_enabled;
872 
878  template <typename C>
879  static void apply(C&& c) {
880  constexpr_select auto impl = select_fft1_many_impl();
881 
882  if
883  constexpr_select(impl == fft_impl::STD) {
884  inc_counter("impl:std");
885  etl::impl::standard::ifft1_many(c, c);
886  }
887  else if
888  constexpr_select(impl == fft_impl::MKL) {
889  inc_counter("impl:mkl");
890  etl::impl::blas::ifft1_many(c, c);
891  }
892  else if
893  constexpr_select(impl == fft_impl::CUFFT) {
894  inc_counter("impl:cufft");
895  etl::impl::cufft::inplace_ifft1_many(c);
896  }
897  }
898 };
899 
908  template <typename A>
909  static constexpr bool gpu_computable = cufft_enabled;
910 
916  template <typename A, typename C>
917  static void apply(A&& a, C&& c) {
918  constexpr_select auto impl = select_fft2_many_impl();
919 
920  if
921  constexpr_select(impl == fft_impl::STD) {
922  inc_counter("impl:std");
923  etl::impl::standard::ifft2_many(smart_forward(a), c);
924  }
925  else if
926  constexpr_select(impl == fft_impl::MKL) {
927  inc_counter("impl:mkl");
928  etl::impl::blas::ifft2_many(smart_forward(a), c);
929  }
930  else if
931  constexpr_select(impl == fft_impl::CUFFT) {
932  inc_counter("impl:cufft");
933  etl::impl::cufft::ifft2_many(smart_forward_gpu(a), c);
934  }
935  }
936 };
937 
946  template <typename A>
947  static constexpr bool gpu_computable = cufft_enabled;
948 
954  template <typename C>
955  static void apply(C&& c) {
956  constexpr_select auto impl = select_fft2_many_impl();
957 
958  if
959  constexpr_select(impl == fft_impl::STD) {
960  inc_counter("impl:std");
961  etl::impl::standard::ifft2_many(c, c);
962  }
963  else if
964  constexpr_select(impl == fft_impl::MKL) {
965  inc_counter("impl:mkl");
966  etl::impl::blas::ifft2_many(c, c);
967  }
968  else if
969  constexpr_select(impl == fft_impl::CUFFT) {
970  inc_counter("impl:cufft");
971  etl::impl::cufft::inplace_ifft2_many(c);
972  }
973  }
974 };
975 
976 } //end of namespace etl::detail
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:689
Functor for Batched 2D FFT.
Definition: fft.hpp:789
Functor for 2D IFFT (real)
Definition: fft.hpp:637
Functor for 2D FFT.
Definition: fft.hpp:485
constexpr bool mkl_enabled
Indicates if the MKL library is available for ETL.
Definition: config.hpp:64
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:461
Standard implementation.
Functor for Batched 2D IFFT.
Definition: fft.hpp:903
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:765
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:613
Functor for 1D IFFT.
Definition: fft.hpp:409
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:537
D D
The number of dimensions.
Definition: dyn_matrix_view.hpp:24
Functor for Batched 1D IFFT.
Definition: fft.hpp:827
Functor for 2D FFT.
Definition: fft.hpp:523
Functor for 1D IFFT.
Definition: fft.hpp:371
Definition: expression_builder.hpp:699
fft_impl
The different FFT implementations.
Definition: fft_impl.hpp:20
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:499
The Intel MKL implementation.
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:651
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
Functor for Batched 2D IFFT.
Definition: fft.hpp:941
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:917
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:727
Functor for Batched 1D IFFT.
Definition: fft.hpp:865
Functor for Batched 2D FFT.
Definition: fft.hpp:751
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:309
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:955
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:385
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
Functor for 1D FFT.
Definition: fft.hpp:295
The NVidia CuFFT implementation.
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:879
Functor for Batched 1D FFT.
Definition: fft.hpp:675
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:347
constexpr bool cufft_enabled
Indicates if the NVIDIA CUFFT library is available for ETL.
Definition: config.hpp:104
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:575
Functor for 2D IFFT.
Definition: fft.hpp:599
Functor for Inplace 1D FFT.
Definition: fft.hpp:333
Functor for 2D IFFT.
Definition: fft.hpp:561
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:423
static void apply(A &&a, C &&c)
Apply the functor.
Definition: fft.hpp:841
Functor for Batched 1D FFT.
Definition: fft.hpp:713
Functor for 1D IFFT (real)
Definition: fft.hpp:447
static void apply(C &&c)
Apply the functor.
Definition: fft.hpp:803