Expression Templates Library (ETL)
sum.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
23 #pragma once
24 
25 //Include the implementations
26 #include "etl/impl/std/sum.hpp"
27 #include "etl/impl/vec/sum.hpp"
28 #include "etl/impl/blas/sum.hpp"
29 #include "etl/impl/cublas/sum.hpp"
30 
31 namespace etl::detail {
32 
41 template <typename E>
42 constexpr etl::sum_impl select_default_sum_impl(bool no_gpu) {
43  //Note: since the constexpr values will be known at compile time, the
44  //conditions will be a lot simplified
45 
46  if (cublas_enabled && is_gpu_computable<E> && is_floating<E> && !no_gpu) {
47  return etl::sum_impl::CUBLAS;
48  }
49 
50  if (vec_enabled && all_vectorizable<vector_mode, E>) {
51  return etl::sum_impl::VEC;
52  }
53 
54  return etl::sum_impl::STD;
55 }
56 
57 #ifdef ETL_MANUAL_SELECT
58 
64 template <typename E>
66  if (local_context().sum_selector.forced) {
67  auto forced = local_context().sum_selector.impl;
68 
69  switch (forced) {
70  //VEC cannot always be used
71  case sum_impl::VEC:
72  if (!vec_enabled || !decay_traits<E>::template vectorizable<vector_mode>) { //COVERAGE_EXCLUDE_LINE
73  std::cerr << "Forced selection to VEC sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
74  return select_default_sum_impl<E>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
75  } //COVERAGE_EXCLUDE_LINE
76 
77  return forced;
78 
79  case sum_impl::CUBLAS:
80  if (!cublas_enabled || !is_gpu_computable<E> || !is_floating<E> || local_context().cpu) { //COVERAGE_EXCLUDE_LINE
81  std::cerr << "Forced selection to CUBLAS sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
82  return select_default_sum_impl<E>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
83  } //COVERAGE_EXCLUDE_LINE
84 
85  return forced;
86 
87  case sum_impl::BLAS:
88  if (!cblas_enabled || !is_dma<E> || !is_floating<E>) { //COVERAGE_EXCLUDE_LINE
89  std::cerr << "Forced selection to BLAS sum implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
90  return select_default_sum_impl<E>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
91  } //COVERAGE_EXCLUDE_LINE
92 
93  return forced;
94 
95  //In other cases, simply use the forced impl
96  default:
97  return forced;
98  }
99  }
100 
101  return select_default_sum_impl<E>(local_context().cpu);
102 }
103 
104 #else
105 
114 template <typename E>
116  return select_default_sum_impl<E>(false);
117 }
118 
119 #endif
120 
124 struct sum_impl {
128  template <typename E>
129  static value_t<E> apply(const E& e) {
130  constexpr_select const auto impl = select_sum_impl<E>();
131 
132  if
133  constexpr_select(impl == etl::sum_impl::VEC) {
134  inc_counter("impl:vec");
135  return impl::vec::sum(e);
136  }
137  else if
138  constexpr_select(impl == etl::sum_impl::BLAS) {
139  inc_counter("impl:blas");
140  return impl::blas::sum(e);
141  }
142  else if
143  constexpr_select(impl == etl::sum_impl::CUBLAS) {
144  if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
145  inc_counter("impl:cublas");
146  return impl::cublas::sum(e);
147  } else {
148  cpp_unreachable("CUBLAS called on invalid types");
149  return value_t<E>(0);
150  }
151  }
152  else {
153  inc_counter("impl:std");
154  return impl::standard::sum(e);
155  }
156  }
157 };
158 
162 struct asum_impl {
166  template <typename E>
167  static value_t<E> apply(const E& e) {
168  constexpr_select const auto impl = select_sum_impl<E>();
169 
170  if
171  constexpr_select(impl == etl::sum_impl::VEC) {
172  inc_counter("impl:vec");
173  return impl::vec::asum(e);
174  }
175  else if
176  constexpr_select(impl == etl::sum_impl::BLAS) {
177  inc_counter("impl:blas");
178  return impl::blas::asum(e);
179  }
180  else if
181  constexpr_select(impl == etl::sum_impl::CUBLAS) {
182  if constexpr (gpu_computable_single_precision<E> || gpu_computable_double_precision<E>) {
183  inc_counter("impl:cublas");
184  return impl::cublas::asum(e);
185  } else {
186  cpp_unreachable("CUBLAS called on invalid types");
187  return value_t<E>(0);
188  }
189  }
190  else {
191  inc_counter("impl:std");
192  return impl::standard::asum(e);
193  }
194  }
195 };
196 
197 } //end of namespace etl::detail
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:167
Absolute Sum operation implementation.
Definition: sum.hpp:162
Standard implementation.
BLAS implementation of the "sum" reduction.
constexpr bool vec_enabled
Indicates if vectorization is available in any format.
Definition: config.hpp:220
VEC implementation.
sum_impl
Enumeration describing the different implementations of sum.
Definition: sum_impl.hpp:20
BLAS implementation.
static value_t< E > apply(const E &e)
Apply the functor to e.
Definition: sum.hpp:129
Definition: expression_builder.hpp:699
Standard implementation of the "sum" reduction.
Traits to get information about ETL types.
Definition: tmp.hpp:68
BLAS implementation.
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
Unified vectorized implementation of the "sum" reduction.
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
Sum operation implementation.
Definition: sum.hpp:124
CUBLAS implementation of the sum product.
constexpr etl::sum_impl select_default_sum_impl(bool no_gpu)
Select the sum implementation for an expression of type E.
Definition: sum.hpp:42
constexpr bool cblas_enabled
Indicates if a BLAS library is available for ETL.
Definition: config.hpp:76
constexpr etl::sum_impl select_sum_impl()
Select the sum implementation for an expression of type E.
Definition: sum.hpp:115
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25