Expression Templates Library (ETL)
pooling.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 // Include all the modules
11 
12 #include "etl/impl/max_pooling_derivative.hpp"
13 #include "etl/impl/avg_pooling_derivative.hpp"
14 
15 // Include the implementations
16 
17 #include "etl/impl/std/max_pooling.hpp"
18 #include "etl/impl/std/avg_pooling.hpp"
19 #include "etl/impl/cudnn/max_pooling.hpp"
20 
21 namespace etl::impl {
22 
33 template <etl_dma X, etl_dma Y>
34 constexpr etl::pool_impl select_default_pool_impl(bool no_gpu) {
35  if (cudnn_enabled && all_floating<X, Y> && !no_gpu) {
36  return etl::pool_impl::CUDNN;
37  }
38 
39  return etl::pool_impl::STD;
40 }
41 
42 #ifdef ETL_MANUAL_SELECT
43 
50 template <typename X, typename Y>
51 etl::pool_impl select_pool_impl() {
52  if (local_context().pool_selector.forced) {
53  auto forced = local_context().pool_selector.impl;
54 
55  switch (forced) {
56  // CUDNN cannot always be used
57  case pool_impl::CUDNN:
58  if (!cudnn_enabled || !all_floating<X, Y> || local_context().cpu) { //COVERAGE_EXCLUDE_LINE
59  std::cerr << "Forced selection to CUDNN pool implementation, but not possible for this expression" << std::endl; //COVERAGE_EXCLUDE_LINE
60  return select_default_pool_impl<X, Y>(local_context().cpu); //COVERAGE_EXCLUDE_LINE
61  } //COVERAGE_EXCLUDE_LINE
62 
63  return forced;
64 
65  //In other cases, simply use the forced impl
66  default:
67  return forced;
68  }
69  }
70 
71  return select_default_pool_impl<X, Y>(local_context().cpu);
72 }
73 
74 #else
75 
84 template <typename X, typename Y>
85 constexpr etl::pool_impl select_pool_impl() {
86  return select_default_pool_impl<X, Y>(false);
87 }
88 
89 #endif
90 
94 struct max_pool_2d {
99  template <typename A>
100  static constexpr bool gpu_computable = cudnn_enabled;
101 
117  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, typename X, typename Y>
118  static void apply(const X& x, Y&& y) {
119  constexpr_select const auto impl = select_pool_impl<X, Y>();
120 
121  if
122  constexpr_select(impl == pool_impl::STD) {
123  inc_counter("impl:std");
124  etl::impl::standard::max_pool_2d::apply<C1, C2, S1, S2, P1, P2>(smart_forward(x), y);
125  }
126  else if
127  constexpr_select(impl == pool_impl::CUDNN) {
128  inc_counter("impl:cudnn");
129  etl::impl::cudnn::max_pool_2d::apply(smart_forward_gpu(x), y, C1, C2, S1, S2, P1, P2);
130  }
131  else {
132  cpp_unreachable("Invalid selection for pooling");
133  }
134  }
135 
151  template <typename X, typename Y>
152  static void apply(const X& x, Y&& y, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
153  constexpr_select const auto impl = select_pool_impl<X, Y>();
154 
155  if
156  constexpr_select(impl == pool_impl::STD) {
157  inc_counter("impl:std");
158  etl::impl::standard::max_pool_2d::apply(smart_forward(x), y, c1, c2, s1, s2, p1, p2);
159  }
160  else if
161  constexpr_select(impl == pool_impl::CUDNN) {
162  inc_counter("impl:cudnn");
163  etl::impl::cudnn::max_pool_2d::apply(smart_forward_gpu(x), y, c1, c2, s1, s2, p1, p2);
164  }
165  else {
166  cpp_unreachable("Invalid selection for pooling");
167  }
168  }
169 };
170 
174 struct avg_pool_2d {
179  template <typename A>
180  static constexpr bool gpu_computable = cudnn_enabled;
181 
197  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, typename X, typename Y>
198  static void apply(const X& x, Y&& y) {
199  constexpr_select const auto impl = select_pool_impl<X, Y>();
200 
201  if
202  constexpr_select(impl == pool_impl::STD) {
203  inc_counter("impl:std");
204  etl::impl::standard::avg_pool_2d::apply<C1, C2, S1, S2, P1, P2>(smart_forward(x), y);
205  }
206  else if
207  constexpr_select(impl == pool_impl::CUDNN) {
208  inc_counter("impl:cudnn");
209  etl::impl::cudnn::avg_pool_2d::apply(smart_forward_gpu(x), y, C1, C2, S1, S2, P1, P2);
210  }
211  else {
212  cpp_unreachable("Invalid selection for pooling");
213  }
214  }
215 
231  template <typename X, typename Y>
232  static void apply(const X& x, Y&& y, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
233  constexpr_select const auto impl = select_pool_impl<X, Y>();
234 
235  if
236  constexpr_select(impl == pool_impl::STD) {
237  inc_counter("impl:std");
238  etl::impl::standard::avg_pool_2d::apply(smart_forward(x), y, c1, c2, s1, s2, p1, p2);
239  }
240  else if
241  constexpr_select(impl == pool_impl::CUDNN) {
242  inc_counter("impl:cudnn");
243  etl::impl::cudnn::avg_pool_2d::apply(smart_forward_gpu(x), y, c1, c2, s1, s2, p1, p2);
244  }
245  else {
246  cpp_unreachable("Invalid selection for pooling");
247  }
248  }
249 };
250 
254 struct max_pool_3d {
259  template <typename A>
260  static constexpr bool gpu_computable = cudnn_enabled;
261 
277  template <size_t C1, size_t C2, size_t C3, size_t S1, size_t S2, size_t S3, size_t P1, size_t P2, size_t P3, typename X, typename Y>
278  static void apply(const X& x, Y&& y) {
279  constexpr_select const auto impl = select_pool_impl<X, Y>();
280 
281  if
282  constexpr_select(impl == pool_impl::STD) {
283  inc_counter("impl:std");
284  etl::impl::standard::max_pool_3d::apply<C1, C2, C3, S1, S2, S3, P1, P2, P3>(smart_forward(x), y);
285  }
286  else if
287  constexpr_select(impl == pool_impl::CUDNN) {
288  inc_counter("impl:cudnn");
289  etl::impl::cudnn::max_pool_3d::apply(smart_forward_gpu(x), y, C1, C2, C3, S1, S2, S3, P1, P2, P3);
290  }
291  else {
292  cpp_unreachable("Invalid selection for pooling");
293  }
294  }
295 
311  template <typename X, typename Y>
312  static void apply(const X& x, Y&& y, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
313  constexpr_select const auto impl = select_pool_impl<X, Y>();
314 
315  if
316  constexpr_select(impl == pool_impl::STD) {
317  inc_counter("impl:std");
318  etl::impl::standard::max_pool_3d::apply(smart_forward(x), y, c1, c2, c3, s1, s2, s3, p1, p2, p3);
319  }
320  else if
321  constexpr_select(impl == pool_impl::CUDNN) {
322  inc_counter("impl:cudnn");
323  etl::impl::cudnn::max_pool_3d::apply(smart_forward_gpu(x), y, c1, c2, c3, s1, s2, s3, p1, p2, p3);
324  }
325  else {
326  cpp_unreachable("Invalid selection for pooling");
327  }
328  }
329 };
330 
334 struct avg_pool_3d {
339  template <typename A>
340  static constexpr bool gpu_computable = cudnn_enabled;
341 
357  template <size_t C1, size_t C2, size_t C3, size_t S1, size_t S2, size_t S3, size_t P1, size_t P2, size_t P3, typename X, typename Y>
358  static void apply(const X& x, Y&& y) {
359  constexpr_select const auto impl = select_pool_impl<X, Y>();
360 
361  if
362  constexpr_select(impl == pool_impl::STD) {
363  inc_counter("impl:std");
364  etl::impl::standard::avg_pool_3d::apply<C1, C2, C3, S1, S2, S3, P1, P2, P3>(smart_forward(x), y);
365  }
366  else if
367  constexpr_select(impl == pool_impl::CUDNN) {
368  inc_counter("impl:cudnn");
369  etl::impl::cudnn::avg_pool_3d::apply(smart_forward_gpu(x), y, C1, C2, C3, S1, S2, S3, P1, P2, P3);
370  }
371  else {
372  cpp_unreachable("Invalid selection for pooling");
373  }
374  }
375 
391  template <typename X, typename Y>
392  static void apply(const X& x, Y&& y, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
393  const auto impl = select_pool_impl<X, Y>();
394 
395  if
396  constexpr_select(impl == pool_impl::STD) {
397  inc_counter("impl:std");
398  etl::impl::standard::avg_pool_3d::apply(smart_forward(x), y, c1, c2, c3, s1, s2, s3, p1, p2, p3);
399  }
400  else if
401  constexpr_select(impl == pool_impl::CUDNN) {
402  inc_counter("impl:cudnn");
403  etl::impl::cudnn::avg_pool_3d::apply(smart_forward_gpu(x), y, c1, c2, c3, s1, s2, s3, p1, p2, p3);
404  }
405  else {
406  cpp_unreachable("Invalid selection for pooling");
407  }
408  }
409 };
410 
411 } //end of namespace etl::impl
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: avg_pooling.hpp:587
pool_impl
Enumeration describing the different implementations of pooling.
Definition: pool_impl.hpp:21
Standard implementation.
static void apply(const X &x, Y &&y, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Pool x into y.
Definition: pooling.hpp:232
Functor for 2D Average Pooling.
Definition: pooling.hpp:174
static void apply(const X &x, Y &&y)
Pool x into y.
Definition: pooling.hpp:198
static void apply(const X &x, Y &&y)
Pool x into y.
Definition: pooling.hpp:118
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:855
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
Functor for 2D Max Pooling.
Definition: pooling.hpp:94
static void apply(const X &x, Y &&y, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3)
Pool x into y.
Definition: pooling.hpp:392
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
static void apply([[maybe_unused]] const X &x, [[maybe_unused]] Y &&y, [[maybe_unused]] size_t c1, [[maybe_unused]] size_t c2, [[maybe_unused]] size_t c3, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t s3, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2, [[maybe_unused]] size_t p3)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:249
bool cpu
Force CPU evaluation.
Definition: context.hpp:29
GPU implementation.
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: pooling.hpp:100
static void apply([[maybe_unused]] const X &x, [[maybe_unused]] Y &&y, [[maybe_unused]] size_t c1, [[maybe_unused]] size_t c2, [[maybe_unused]] size_t c3, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t s3, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2, [[maybe_unused]] size_t p3)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:276
static void apply(const X &x, Y &&y)
Pool x into y.
Definition: pooling.hpp:358
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:134
static void apply(const X &x, Y &&y, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Pool x into y.
Definition: pooling.hpp:152
Functor for 3D Average Pooling.
Definition: pooling.hpp:334
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: avg_pooling.hpp:127
decltype(auto) smart_forward(E &expr)
Smart forwarding for a temporary expression.
Definition: helpers.hpp:323
static void apply(const X &x, Y &&y)
Pool x into y.
Definition: pooling.hpp:278
static void apply([[maybe_unused]] const X &x, [[maybe_unused]] Y &&y, [[maybe_unused]] size_t c1, [[maybe_unused]] size_t c2, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:201
Definition: avg_pooling_derivative.hpp:10
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
static void apply(const X &x, Y &&y, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3)
Pool x into y.
Definition: pooling.hpp:312
Functor for 3D Max Pooling.
Definition: pooling.hpp:254
static void apply([[maybe_unused]] const X &x, [[maybe_unused]] Y &&y, [[maybe_unused]] size_t c1, [[maybe_unused]] size_t c2, [[maybe_unused]] size_t s1, [[maybe_unused]] size_t s2, [[maybe_unused]] size_t p1, [[maybe_unused]] size_t p2)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:225