Expression Templates Library (ETL)
max_pooling.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 namespace etl::impl::standard {
11 
15 struct max_pool_2d {
28  template <typename A>
29  static auto pool_block_border(const A& sub, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
30  auto max = value_t<A>(0);
31 
32  const auto s_j = j * s1;
33  const auto s_k = k * s2;
34 
35  for (size_t jj = 0; jj < c1; ++jj) {
36  for (size_t kk = 0; kk < c2; ++kk) {
37  if (s_j + jj >= p1 && (s_j + jj) - p1 < etl::dim<0>(sub) && s_k + kk >= p2 && (s_k + kk) - p2 < etl::dim<1>(sub)) {
38  max = std::max(max, sub(s_j + jj - p1, s_k + kk - p2));
39  }
40  }
41  }
42 
43  return max;
44  }
45 
56  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, typename A>
57  static auto pool_block_2d(const A& sub, size_t j, size_t k) {
58  const auto s_j = j * S1 - P1;
59  const auto s_k = k * S2 - P2;
60 
61  auto max = sub(s_j, s_k);
62 
63  for (size_t jj = 0; jj < C1; ++jj) {
64  for (size_t kk = 0; kk < C2; ++kk) {
65  max = std::max(max, sub(s_j + jj, s_k + kk));
66  }
67  }
68 
69  return max;
70  }
71 
82  template <size_t C1, size_t C2, size_t S1, size_t S2, typename A>
83  static auto pool_block_3d(const A& sub, size_t n, size_t j, size_t k) {
84  const auto s_j = j * S1;
85  const auto s_k = k * S2;
86 
87  auto max = sub(n, s_j, s_k);
88 
89  for (size_t jj = 0; jj < C1; ++jj) {
90  for (size_t kk = 0; kk < C2; ++kk) {
91  max = std::max(max, sub(n, s_j + jj, s_k + kk));
92  }
93  }
94 
95  return max;
96  }
97 
108  template <size_t C1, size_t C2, size_t S1, size_t S2, typename A>
109  static auto pool_block_4d(const A& sub, size_t m, size_t n, size_t j, size_t k) {
110  const auto s_j = j * S1;
111  const auto s_k = k * S2;
112 
113  auto max = sub(m, n, s_j, s_k);
114 
115  for (size_t jj = 0; jj < C1; ++jj) {
116  for (size_t kk = 0; kk < C2; ++kk) {
117  max = std::max(max, sub(m, n, s_j + jj, s_k + kk));
118  }
119  }
120 
121  return max;
122  }
123 
133  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, etl_2d A, typename M>
134  static void apply(const A& sub, M&& m) {
135  if (!P1 && !P2 && S1 == C1 && S2 == C2){
136  if (C1 == 2 && C2 == 2) {
137  for (size_t j = 0; j < etl::dim<0>(m); ++j) {
138  for (size_t k = 0; k < etl::dim<1>(m); ++k) {
139  m(j, k) = pool_block_2d_2x2(sub, j, k);
140  }
141  }
142  } else {
143  for (size_t j = 0; j < etl::dim<0>(m); ++j) {
144  for (size_t k = 0; k < etl::dim<1>(m); ++k) {
145  m(j, k) = pool_block_2d<C1, C2, C1, C2, 0, 0>(sub, j, k);
146  }
147  }
148  }
149  } else {
150  const size_t o1 = (etl::dim<0>(sub) - C1 + 2 * P1) / S1 + 1;
151  const size_t o2 = (etl::dim<1>(sub) - C2 + 2 * P2) / S2 + 1;
152 
153  if (P1 || P2) {
154  for (size_t i = 0; i < P1; ++i) {
155  for (size_t j = 0; j < o2; ++j) {
156  m(i, j) = pool_block_border(sub, i, j, C1, C2, S1, S2, P1, P2);
157  }
158  }
159 
160  for (size_t i = o1 - P1; i < o1; ++i) {
161  for (size_t j = 0; j < o2; ++j) {
162  m(i, j) = pool_block_border(sub, i, j, C1, C2, S1, S2, P1, P2);
163  }
164  }
165 
166  for (size_t j = 0; j < P2; ++j) {
167  for (size_t i = P1; i < o1 - P1; ++i) {
168  m(i, j) = pool_block_border(sub, i, j, C1, C2, S1, S2, P1, P2);
169  }
170  }
171 
172  for (size_t j = o2 - P2; j < o2; ++j) {
173  for (size_t i = P1; i < o1 - P1; ++i) {
174  m(i, j) = pool_block_border(sub, i, j, C1, C2, S1, S2, P1, P2);
175  }
176  }
177  }
178 
179  for (size_t j = P1; j < o1 - P1; ++j) {
180  for (size_t k = P1; k < o2 - P2; ++k) {
181  m(j, k) = pool_block_2d<C1, C2, S1, S2, P1, P2>(sub, j, k);
182  }
183  }
184  }
185  }
186 
195  template <typename A>
196  static auto pool_block_2d(const A& sub, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
197  const auto s_j = j * s1 - p1;
198  const auto s_k = k * s2 - p2;
199 
200  auto max = sub(s_j, s_k);
201 
202  for (size_t jj = 0; jj < c1; ++jj) {
203  for (size_t kk = 0; kk < c2; ++kk) {
204  max = std::max(max, sub(s_j + jj, s_k + kk));
205  }
206  }
207 
208  return max;
209  }
210 
219  template <typename A>
220  static auto pool_block_2d(const A& sub, size_t j, size_t k, size_t c1, size_t c2) {
221  const auto s_j = j * c1;
222  const auto s_k = k * c2;
223 
224  auto max = sub(s_j, s_k);
225 
226  for (size_t jj = 0; jj < c1; ++jj) {
227  for (size_t kk = 0; kk < c2; ++kk) {
228  max = std::max(max, sub(s_j + jj, s_k + kk));
229  }
230  }
231 
232  return max;
233  }
234 
243  template <typename A>
244  static auto pool_block_2d_2x2(const A& sub, size_t j, size_t k) {
245  auto m1 = std::max(sub(j * 2 + 0, k * 2 + 0), sub(j * 2 + 0, k * 2 + 1));
246  auto m2 = std::max(sub(j * 2 + 1, k * 2 + 0), sub(j * 2 + 1, k * 2 + 1));
247 
248  return std::max(m1, m2);
249  }
250 
259  template <typename A>
260  static auto pool_block_3d(const A& sub, size_t n, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2) {
261  const auto s_j = j * s1;
262  const auto s_k = k * s2;
263 
264  auto max = sub(n, s_j, s_k);
265 
266  for (size_t jj = 0; jj < c1; ++jj) {
267  for (size_t kk = 0; kk < c2; ++kk) {
268  max = std::max(max, sub(n, s_j + jj, s_k + kk));
269  }
270  }
271 
272  return max;
273  }
274 
283  template <typename A>
284  static auto pool_block_3d(const A& sub, size_t n, size_t j, size_t k, size_t c1, size_t c2) {
285  const auto s_j = j * c1;
286  const auto s_k = k * c2;
287 
288  auto max = sub(n, s_j, s_k);
289 
290  for (size_t jj = 0; jj < c1; ++jj) {
291  for (size_t kk = 0; kk < c2; ++kk) {
292  max = std::max(max, sub(n, s_j + jj, s_k + kk));
293  }
294  }
295 
296  return max;
297  }
298 
307  template <typename A>
308  static auto pool_block_3d_2x2(const A& sub, size_t n, size_t j, size_t k) {
309  auto m1 = std::max(sub(n, j * 2 + 0, k * 2 + 0), sub(n, j * 2 + 0, k * 2 + 1));
310  auto m2 = std::max(sub(n, j * 2 + 1, k * 2 + 0), sub(n, j * 2 + 1, k * 2 + 1));
311 
312  return std::max(m1, m2);
313  }
314 
323  template <typename A>
324  static auto pool_block_4d(const A& sub, size_t m, size_t n, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2) {
325  const auto s_j = j * s1;
326  const auto s_k = k * s2;
327 
328  auto max = sub(m, n, s_j, s_k);
329 
330  for (size_t jj = 0; jj < c1; ++jj) {
331  for (size_t kk = 0; kk < c2; ++kk) {
332  max = std::max(max, sub(m, n, s_j + jj, s_k + kk));
333  }
334  }
335 
336  return max;
337  }
338 
347  template <typename A>
348  static auto pool_block_4d(const A& sub, size_t m, size_t n, size_t j, size_t k, size_t c1, size_t c2) {
349  const auto s_j = j * 2;
350  const auto s_k = k * 2;
351 
352  auto max = sub(m, n, s_j, s_k);
353 
354  for (size_t jj = 0; jj < c1; ++jj) {
355  for (size_t kk = 0; kk < c2; ++kk) {
356  max = std::max(max, sub(m, n, s_j + jj, s_k + kk));
357  }
358  }
359 
360  return max;
361  }
362 
371  template <typename A>
372  static auto pool_block_4d_2x2(const A& sub, size_t m, size_t n, size_t j, size_t k) {
373  auto m1 = std::max(sub(m, n, j * 2 + 0, k * 2 + 0), sub(m, n, j * 2 + 0, k * 2 + 1));
374  auto m2 = std::max(sub(m, n, j * 2 + 1, k * 2 + 0), sub(m, n, j * 2 + 1, k * 2 + 1));
375 
376  return std::max(m1, m2);
377  }
378 
386  template <etl_2d A, typename M>
387  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
388  if (!p1 && !p2 && s1 == c1 && s2 == c2){
389  if (c1 == 2 && c2 == 2) {
390  for (size_t j = p1; j < etl::dim<0>(m); ++j) {
391  for (size_t k = p2; k < etl::dim<1>(m); ++k) {
392  m(j, k) = pool_block_2d_2x2(sub, j, k);
393  }
394  }
395  } else {
396  for (size_t j = p1; j < etl::dim<0>(m); ++j) {
397  for (size_t k = p2; k < etl::dim<1>(m); ++k) {
398  m(j, k) = pool_block_2d(sub, j, k, c1, c2);
399  }
400  }
401  }
402 
403  } else {
404  const size_t o1 = (etl::dim<0>(sub) - c1 + 2 * p1) / s1 + 1;
405  const size_t o2 = (etl::dim<1>(sub) - c2 + 2 * p2) / s2 + 1;
406 
407  if (p1 || p2) {
408  for (size_t i = 0; i < p1; ++i) {
409  for (size_t j = 0; j < o2; ++j) {
410  m(i, j) = pool_block_border(sub, i, j, c1, c2, s1, s2, p1, p2);
411  }
412  }
413 
414  for (size_t i = o1 - p1; i < o1; ++i) {
415  for (size_t j = 0; j < o2; ++j) {
416  m(i, j) = pool_block_border(sub, i, j, c1, c2, s1, s2, p1, p2);
417  }
418  }
419 
420  for (size_t j = 0; j < p2; ++j) {
421  for (size_t i = p1; i < o1 - p1; ++i) {
422  m(i, j) = pool_block_border(sub, i, j, c1, c2, s1, s2, p1, p2);
423  }
424  }
425 
426  for (size_t j = o2 - p2; j < o2; ++j) {
427  for (size_t i = p1; i < o1 - p1; ++i) {
428  m(i, j) = pool_block_border(sub, i, j, c1, c2, s1, s2, p1, p2);
429  }
430  }
431  }
432 
433  for (size_t j = p1; j < o1 - p1; ++j) {
434  for (size_t k = p2; k < o2 - p2; ++k) {
435  m(j, k) = pool_block_2d(sub, j, k, c1, c2, s1, s2, p1, p2);
436  }
437  }
438  }
439  }
440 
441  /*
442  * 3D handling
443  *
444  * This is especially optimized because this is the most common
445  * case in machine learning. Moreover, this is also easy to
446  * parallelize and optimize
447  */
448 
456  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, etl_3d A, typename M>
457  static void apply(const A& sub, M&& m) {
458  const size_t N = etl::dim<0>(m);
459 
460  if (!P1 && !P2 && S1 == C1 && S2 == C2){
461  if (C1 == 2 && C2 == 2) {
462  auto batch_fun_n = [&](const size_t first, const size_t last) {
463  for (size_t n = first; n < last; ++n) {
464  for (size_t j = 0; j < etl::dim<1>(m); ++j) {
465  for (size_t k = 0; k < etl::dim<2>(m); ++k) {
466  m(n, j, k) = pool_block_3d_2x2(sub, n, j, k);
467  }
468  }
469  }
470  };
471 
472  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
473  } else {
474  auto batch_fun_n = [&](const size_t first, const size_t last) {
475  for (size_t n = first; n < last; ++n) {
476  for (size_t j = 0; j < etl::dim<1>(m); ++j) {
477  for (size_t k = 0; k < etl::dim<2>(m); ++k) {
478  m(n, j, k) = pool_block_3d<C1, C2, C1, C2>(sub, n, j, k);
479  }
480  }
481  }
482  };
483 
484  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
485  }
486  } else {
487  auto batch_fun_n = [&](const size_t first, const size_t last) {
488  if (last - first) {
489  if (cpp_likely(!P1 && !P2)) {
490  for (size_t n = first; n < last; ++n) {
491  for (size_t j = 0; j < etl::dim<1>(m); ++j) {
492  for (size_t k = 0; k < etl::dim<2>(m); ++k) {
493  m(n, j, k) = pool_block_3d<C1, C2, S1, S2>(sub, n, j, k);
494  }
495  }
496  }
497  } else {
498  // In the general case, we use the regular algorithm
499  for (size_t n = first; n < last; ++n) {
500  apply<C1, C2, S1, S2, P1, P2>(sub(n), m(n));
501  }
502  }
503  }
504  };
505 
506  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
507  }
508  }
509 
517  template <etl_3d A, typename M>
518  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
519  const size_t N = etl::dim<0>(m);
520 
521  if (!p1 && !p2 && s1 == c1 && s2 == c2){
522  if (c1 == 2 && c2 == 2) {
523  auto batch_fun_n = [&](const size_t first, const size_t last) {
524  if (last - first) {
525  for (size_t n = first; n < last; ++n) {
526  for (size_t j = 0; j < etl::dim<1>(m); ++j) {
527  for (size_t k = 0; k < etl::dim<2>(m); ++k) {
528  m(n, j, k) = pool_block_3d_2x2(sub, n, j, k);
529  }
530  }
531  }
532  }
533  };
534 
535  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
536  } else {
537  auto batch_fun_n = [&](const size_t first, const size_t last) {
538  if (last - first) {
539  for (size_t n = first; n < last; ++n) {
540  for (size_t j = 0; j < etl::dim<1>(m); ++j) {
541  for (size_t k = 0; k < etl::dim<2>(m); ++k) {
542  m(n, j, k) = pool_block_3d(sub, n, j, k, c1, c2);
543  }
544  }
545  }
546  }
547  };
548 
549  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
550  }
551  } else {
552  // The general 2D kernel is ismply to call 2D general kernel
553 
554  auto batch_fun_n = [&](const size_t first, const size_t last) {
555  for (size_t n = first; n < last; ++n) {
556  apply(sub(n), m(n), c1, c2, s1, s2, p1, p2);
557  }
558  };
559 
560  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
561  }
562  }
563 
564  /*
565  * 4D handling
566  *
567  * This is especially optimized because this is the most common
568  * case in machine learning. Moreover, this is also easy to
569  * parallelize and optimize
570  */
571 
579  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, etl_4d A, typename M>
580  static void apply(const A& sub, M&& m) {
581  const size_t N = etl::dim<0>(m);
582 
583  if (!P1 && !P2 && S1 == C1 && S2 == C2){
584  if (C1 == 2 && C2 == 2) {
585  auto batch_fun_n = [&](const size_t first, const size_t last) {
586  for (size_t mm = first; mm < last; ++mm) {
587  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
588  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
589  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
590  m(mm, n, j, k) = pool_block_4d_2x2(sub, mm, n, j, k);
591  }
592  }
593  }
594  }
595  };
596 
597  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
598  } else {
599  auto batch_fun_n = [&](const size_t first, const size_t last) {
600  for (size_t mm = first; mm < last; ++mm) {
601  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
602  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
603  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
604  m(mm, n, j, k) = pool_block_4d<C1, C2, C1, C2>(sub, mm, n, j, k);
605  }
606  }
607  }
608  }
609  };
610 
611  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
612  }
613  } else {
614  auto batch_fun_n = [&](const size_t first, const size_t last) {
615  if (cpp_likely(!P1 && !P2)) {
616  for (size_t mm = first; mm < last; ++mm) {
617  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
618  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
619  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
620  m(mm, n, j, k) = pool_block_4d<C1, C2, S1, S2>(sub, mm, n, j, k);
621  }
622  }
623  }
624  }
625  } else {
626  for (size_t mm = first; mm < last; ++mm) {
627  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
628  apply<C1, C2, S1, S2, P1, P2>(sub(mm)(n), m(mm)(n));
629  }
630  }
631  }
632  };
633 
634  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
635  }
636  }
637 
645  template <etl_4d A, typename M>
646  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
647  const size_t N = etl::dim<0>(m);
648 
649  if (!p1 && !p2 && s1 == c1 && s2 == c2){
650  if (c1 == 2 && c2 == 2) {
651  auto batch_fun_n = [&](const size_t first, const size_t last) {
652  if (last - first) {
653  for (size_t mm = first; mm < last; ++mm) {
654  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
655  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
656  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
657  m(mm, n, j, k) = pool_block_4d_2x2(sub, mm, n, j, k);
658  }
659  }
660  }
661  }
662  }
663  };
664 
665  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
666  } else {
667  auto batch_fun_n = [&](const size_t first, const size_t last) {
668  if (last - first) {
669  for (size_t mm = first; mm < last; ++mm) {
670  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
671  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
672  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
673  m(mm, n, j, k) = pool_block_4d(sub, mm, n, j, k, c1, c2);
674  }
675  }
676  }
677  }
678  }
679  };
680 
681  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
682  }
683  } else {
684  auto batch_fun_n = [&](const size_t first, const size_t last) {
685  if (last - first) {
686  for (size_t mm = first; mm < last; ++mm) {
687  for (size_t n = 0; n < etl::dim<1>(m); ++n) {
688  apply(sub(mm)(n), m(mm)(n), c1, c2, s1, s2, p1, p2);
689  }
690  }
691  }
692  };
693 
694  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
695  }
696  }
697 
698  // Deep handling
699 
709  template <size_t C1, size_t C2, size_t S1, size_t S2, size_t P1, size_t P2, etl_5d_and_plus A, typename M>
710  static void apply(const A& sub, M&& m) {
711  for (size_t i = 0; i < etl::dim<0>(sub); ++i) {
712  apply<C1, C2, S1, S2, P1, P2>(sub(i), m(i));
713  }
714  }
715 
723  template <etl_5d_and_plus A, typename M>
724  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2) {
725  for (size_t i = 0; i < etl::dim<0>(sub); ++i) {
726  apply(sub(i), m(i), c1, c2, s1, s2, p1, p2);
727  }
728  }
729 };
730 
734 struct max_pool_3d {
751  template <typename A>
752  static auto pool_block_border(
753  const A& sub, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
754  auto max = value_t<A>(0);
755 
756  const auto s_i = i * s1;
757  const auto s_j = j * s2;
758  const auto s_k = k * s3;
759 
760  for (size_t ii = 0; ii < c1; ++ii) {
761  for (size_t jj = 0; jj < c2; ++jj) {
762  for (size_t kk = 0; kk < c3; ++kk) {
763  if (s_i + ii >= p1 && (s_i + ii) - p1 < etl::dim<0>(sub) && s_j + jj >= p2 && (s_j + jj) - p2 < etl::dim<1>(sub) && s_k + kk >= p3
764  && (s_k + kk) - p3 < etl::dim<2>(sub)) {
765  max = std::max(max, sub(s_i + ii - p1, s_j + jj - p2, s_k + kk - p3));
766  }
767  }
768  }
769  }
770 
771  return max;
772  }
773 
784  template <size_t C1, size_t C2, size_t C3, size_t S1, size_t S2, size_t S3, size_t P1, size_t P2, size_t P3, typename A>
785  static auto pool_block_3d(const A& sub, size_t i, size_t j, size_t k) {
786  const auto s_i = i * S1 - P1;
787  const auto s_j = j * S2 - P2;
788  const auto s_k = k * S3 - P3;
789 
790  auto max = sub(s_i, s_j, s_k);
791 
792  for (size_t ii = 0; ii < C1; ++ii) {
793  for (size_t jj = 0; jj < C2; ++jj) {
794  for (size_t kk = 0; kk < C3; ++kk) {
795  max = std::max(max, sub(s_i + ii, s_j + jj, s_k + kk));
796  }
797  }
798  }
799 
800  return max;
801  }
816  template <size_t C1, size_t C2, size_t C3, size_t S1, size_t S2, size_t S3, typename A>
817  static auto pool_block_4d(const A& sub, size_t n, size_t i, size_t j, size_t k) {
818  const auto s_i = i * S1;
819  const auto s_j = j * S2;
820  const auto s_k = k * S3;
821 
822  auto max = sub(n, s_i, s_j, s_k);
823 
824  for (size_t ii = 0; ii < C1; ++ii) {
825  for (size_t jj = 0; jj < C2; ++jj) {
826  for (size_t kk = 0; kk < C3; ++kk) {
827  max = std::max(max, sub(n, s_i + ii, s_j + jj, s_k + kk));
828  }
829  }
830  }
831 
832  return max;
833  }
834 
843  template <size_t C1,
844  size_t C2,
845  size_t C3,
846  size_t S1,
847  size_t S2,
848  size_t S3,
849  size_t P1,
850  size_t P2,
851  size_t P3,
852  etl_3d A,
853  typename M
854  >
855  static void apply(const A& sub, M&& m) {
856  const size_t o1 = (etl::dim<0>(sub) - C1 + 2 * P1) / S1 + 1;
857  const size_t o2 = (etl::dim<1>(sub) - C2 + 2 * P2) / S2 + 1;
858  const size_t o3 = (etl::dim<2>(sub) - C3 + 2 * P3) / S3 + 1;
859 
860  if (P1 || P2 || P3) {
861  for (size_t i = 0; i < P1; ++i) {
862  for (size_t j = 0; j < o2; ++j) {
863  for (size_t k = 0; k < o3; ++k) {
864  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
865  }
866  }
867  }
868 
869  for (size_t i = o1 - P1; i < o1; ++i) {
870  for (size_t j = 0; j < o2; ++j) {
871  for (size_t k = 0; k < o3; ++k) {
872  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
873  }
874  }
875  }
876 
877  for (size_t j = 0; j < P2; ++j) {
878  for (size_t i = P1; i < o1 - P1; ++i) {
879  for (size_t k = 0; k < o3; ++k) {
880  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
881  }
882  }
883  }
884 
885  for (size_t j = o2 - P2; j < o2; ++j) {
886  for (size_t i = P1; i < o1 - P1; ++i) {
887  for (size_t k = 0; k < o3; ++k) {
888  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
889  }
890  }
891  }
892 
893  for (size_t k = 0; k < P3; ++k) {
894  for (size_t i = P1; i < o1 - P1; ++i) {
895  for (size_t j = P2; j < o2 - P2; ++j) {
896  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
897  }
898  }
899  }
900 
901  for (size_t k = o3 - P3; k < o3; ++k) {
902  for (size_t i = P1; i < o1 - P1; ++i) {
903  for (size_t j = P2; j < o2 - P2; ++j) {
904  m(i, j, k) = pool_block_border(sub, i, j, k, C1, C2, C3, S1, S2, S3, P1, P2, P3);
905  }
906  }
907  }
908  }
909 
910  for (size_t i = P1; i < o1 - P1; ++i) {
911  for (size_t j = P2; j < o2 - P2; ++j) {
912  for (size_t k = P3; k < o3 - P3; ++k) {
913  m(i, j, k) = pool_block_3d<C1, C2, C3, S1, S2, S3, P1, P2, P3>(sub, i, j, k);
914  }
915  }
916  }
917  }
918 
929  template <typename A>
930  static auto pool_block_3d(
931  const A& sub, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
932  auto max = sub(i * s1 - p1, j * s2 - p2, k * s3 - p3);
933 
934  for (size_t ii = 0; ii < c1; ++ii) {
935  for (size_t jj = 0; jj < c2; ++jj) {
936  for (size_t kk = 0; kk < c3; ++kk) {
937  max = std::max(max, sub(i * s1 + ii - p1, j * s2 + jj - p2, k * s3 + kk - p3));
938  }
939  }
940  }
941 
942  return max;
943  }
944 
959  template <typename A>
960  static auto pool_block_4d(const A& sub, size_t n, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3) {
961  auto max = sub(n, i * s1, j * s2, k * s3);
962 
963  for (size_t ii = 0; ii < c1; ++ii) {
964  for (size_t jj = 0; jj < c2; ++jj) {
965  for (size_t kk = 0; kk < c3; ++kk) {
966  max = std::max(max, sub(n, i * s1 + ii, j * s2 + jj, k * s3 + kk));
967  }
968  }
969  }
970 
971  return max;
972  }
973 
982  template <etl_3d A, typename M>
983  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
984  const size_t o1 = (etl::dim<0>(sub) - c1 + 2 * p1) / s1 + 1;
985  const size_t o2 = (etl::dim<1>(sub) - c2 + 2 * p2) / s2 + 1;
986  const size_t o3 = (etl::dim<2>(sub) - c3 + 2 * p3) / s3 + 1;
987 
988  if (p1 || p2 || p3) {
989  for (size_t i = 0; i < p1; ++i) {
990  for (size_t j = 0; j < o2; ++j) {
991  for (size_t k = 0; k < o3; ++k) {
992  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
993  }
994  }
995  }
996 
997  for (size_t i = o1 - p1; i < o1; ++i) {
998  for (size_t j = 0; j < o2; ++j) {
999  for (size_t k = 0; k < o3; ++k) {
1000  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1001  }
1002  }
1003  }
1004 
1005  for (size_t j = 0; j < p2; ++j) {
1006  for (size_t i = p1; i < o1 - p1; ++i) {
1007  for (size_t k = 0; k < o3; ++k) {
1008  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1009  }
1010  }
1011  }
1012 
1013  for (size_t j = o2 - p2; j < o2; ++j) {
1014  for (size_t i = p1; i < o1 - p1; ++i) {
1015  for (size_t k = 0; k < o3; ++k) {
1016  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1017  }
1018  }
1019  }
1020 
1021  for (size_t k = 0; k < p3; ++k) {
1022  for (size_t i = p1; i < o1 - p1; ++i) {
1023  for (size_t j = p2; j < o2 - p2; ++j) {
1024  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1025  }
1026  }
1027  }
1028 
1029  for (size_t k = o3 - p3; k < o3; ++k) {
1030  for (size_t i = p1; i < o1 - p1; ++i) {
1031  for (size_t j = p2; j < o2 - p2; ++j) {
1032  m(i, j, k) = pool_block_border(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1033  }
1034  }
1035  }
1036  }
1037 
1038  for (size_t i = p1; i < o1 - p1; ++i) {
1039  for (size_t j = p2; j < o2 - p2; ++j) {
1040  for (size_t k = p3; k < o3 - p3; ++k) {
1041  m(i, j, k) = pool_block_3d(sub, i, j, k, c1, c2, c3, s1, s2, s3, p1, p2, p3);
1042  }
1043  }
1044  }
1045  }
1046 
1047  /*
1048  * 4D handling
1049  *
1050  * This is especially optimized because this is the most common
1051  * case in machine learning. Moreover, this is also easy to
1052  * parallelize and optimize
1053  */
1054 
1063  template <size_t C1,
1064  size_t C2,
1065  size_t C3,
1066  size_t S1,
1067  size_t S2,
1068  size_t S3,
1069  size_t P1,
1070  size_t P2,
1071  size_t P3,
1072  etl_4d A,
1073  typename M
1074  >
1075  static void apply(const A& sub, M&& m) {
1076  auto batch_fun_n = [&](const size_t first, const size_t last) {
1077  if (last - first) {
1078  if (cpp_likely(!P1 && !P2 && !P3)) {
1079  for (size_t n = first; n < last; ++n) {
1080  for (size_t i = 0; i < etl::dim<1>(m); ++i) {
1081  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
1082  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
1083  m(n, i, j, k) = pool_block_4d<C1, C2, C3, S1, S2, S3>(sub, n, i, j, k);
1084  }
1085  }
1086  }
1087  }
1088  } else {
1089  // In the general case, we use the regular algorithm
1090  for (size_t n = first; n < last; ++n) {
1091  apply<C1, C2, C3, S1, S2, S3, P1, P2, P3>(sub(n), m(n));
1092  }
1093  }
1094  }
1095  };
1096 
1097  const size_t N = etl::dim<0>(m);
1098 
1099  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
1100  }
1101 
1110  template <etl_4d A, typename M>
1111  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
1112  auto batch_fun_n = [&](const size_t first, const size_t last) {
1113  if (last - first) {
1114  if (cpp_likely(!p1 && !p2 && !p3)) {
1115  for (size_t n = first; n < last; ++n) {
1116  for (size_t i = 0; i < etl::dim<1>(m); ++i) {
1117  for (size_t j = 0; j < etl::dim<2>(m); ++j) {
1118  for (size_t k = 0; k < etl::dim<3>(m); ++k) {
1119  m(n, i, j, k) = pool_block_4d(sub, n, i, j, k, c1, c2, c3, s1, s2, s3);
1120  }
1121  }
1122  }
1123  }
1124  } else {
1125  for (size_t n = first; n < last; ++n) {
1126  apply(sub(n), m(n), c1, c2, c3, s1, s2, s3, p1, p2, p3);
1127  }
1128  }
1129  }
1130  };
1131 
1132  const size_t N = etl::dim<0>(m);
1133 
1134  engine_dispatch_1d_serial(batch_fun_n, 0, N, 2UL);
1135  }
1136 
1137  // Deep handling
1138 
1147  template <size_t C1,
1148  size_t C2,
1149  size_t C3,
1150  size_t S1,
1151  size_t S2,
1152  size_t S3,
1153  size_t P1,
1154  size_t P2,
1155  size_t P3,
1156  etl_5d_and_plus A,
1157  typename M>
1158  static void apply(const A& sub, M&& m) {
1159  for (size_t i = 0; i < etl::dim<0>(sub); ++i) {
1160  apply<C1, C2, C3, S1, S2, S3, P1, P2, P3>(sub(i), m(i));
1161  }
1162  }
1163 
1172  template <etl_5d_and_plus A, typename M>
1173  static void apply(const A& sub, M&& m, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3) {
1174  for (size_t i = 0; i < etl::dim<0>(sub); ++i) {
1175  apply(sub(i), m(i), c1, c2, c3, s1, s2, s3, p1, p2, p3);
1176  }
1177  }
1178 };
1179 
1180 } //end of namespace etl::impl::standard
auto max(L &&lhs, R &&rhs)
Create an expression with the max value of lhs or rhs.
Definition: expression_builder.hpp:65
static auto pool_block_3d(const A &sub, size_t i, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:785
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
static auto pool_block_2d_2x2(const A &sub, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:244
static auto pool_block_border(const A &sub, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3)
Pool a block of the sub expression.
Definition: max_pooling.hpp:752
static auto pool_block_4d(const A &sub, size_t n, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3)
Pool a block of the sub expression.
Definition: max_pooling.hpp:960
static void apply(const A &sub, M &&m, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:387
static auto pool_block_2d(const A &sub, size_t j, size_t k, size_t c1, size_t c2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:220
static auto pool_block_2d(const A &sub, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:57
static auto pool_block_4d_2x2(const A &sub, size_t m, size_t n, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:372
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:855
Definition: prob_pooling.hpp:10
static auto pool_block_4d(const A &sub, size_t m, size_t n, size_t j, size_t k, size_t c1, size_t c2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:348
Functor for 2D Max Pooling.
Definition: max_pooling.hpp:15
static auto pool_block_3d(const A &sub, size_t n, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:260
static auto pool_block_4d(const A &sub, size_t m, size_t n, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:324
static auto pool_block_3d(const A &sub, size_t n, size_t j, size_t k, size_t c1, size_t c2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:284
static auto pool_block_3d(const A &sub, size_t i, size_t j, size_t k, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3)
Pool a block of the sub expression.
Definition: max_pooling.hpp:930
static auto pool_block_3d(const A &sub, size_t n, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:83
static void apply(const A &sub, M &&m)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:134
static auto pool_block_4d(const A &sub, size_t m, size_t n, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:109
static auto pool_block_2d(const A &sub, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Pool a block of the sub expression.
Definition: max_pooling.hpp:196
static void apply(const A &sub, M &&m, size_t c1, size_t c2, size_t c3, size_t s1, size_t s2, size_t s3, size_t p1, size_t p2, size_t p3)
Apply the functor on sub and store the result in m.
Definition: max_pooling.hpp:983
static auto pool_block_border(const A &sub, size_t j, size_t k, size_t c1, size_t c2, size_t s1, size_t s2, size_t p1, size_t p2)
Pool a block of the sub expression around the border (with padding)
Definition: max_pooling.hpp:29
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
static auto pool_block_4d(const A &sub, size_t n, size_t i, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:817
static auto pool_block_3d_2x2(const A &sub, size_t n, size_t j, size_t k)
Pool a block of the sub expression.
Definition: max_pooling.hpp:308
Functor for 3D Max Pooling.
Definition: max_pooling.hpp:734