wichtounet/etl/bias__batch__mean__4d__expr_8hpp_source.html

 //=======================================================================
 // Copyright (c) 2014-2023 Baptiste Wicht
 // Distributed under the terms of the MIT License.
 // (See accompanying file LICENSE or copy at
 //  http://opensource.org/licenses/MIT)
 //=======================================================================

 #pragma once

 #include "etl/expr/base_temporary_expr.hpp"

 #include "etl/impl/cudnn/bias_batch_mean.hpp"
 #include "etl/impl/egblas/bias_batch_sum.hpp"

 namespace etl {

 template <etl_4d A, bool Mean>
 struct bias_batch_mean_4d_expr : base_temporary_expr_un<bias_batch_mean_4d_expr<A, Mean>, A> {
     using value_type = value_t<A>;
     using this_type  = bias_batch_mean_4d_expr<A, Mean>;
     using base_type  = base_temporary_expr_un<this_type, A>;
     using sub_traits = decay_traits<A>;

     static constexpr auto storage_order = sub_traits::storage_order;

     static constexpr bool gpu_computable = (!Mean && cudnn_enabled && is_floating<A>)
             || (impl::egblas::has_sbias_batch_sum4 && all_row_major<A> && all_single_precision<A>)
             || (impl::egblas::has_dbias_batch_sum4 && all_row_major<A> && all_double_precision<A>);

     explicit bias_batch_mean_4d_expr(A a) : base_type(a) {
         //Nothing else to init
     }

     template <etl_1d C>
     static void check([[maybe_unused]] const A& a, [[maybe_unused]] const C& c) {
         if constexpr (all_fast<A, C>) {
             static_assert(etl::dim<1, A>() == etl::dim<0, C>(), "Invalid dimensions for bias_batch_mean_4d");
         } else {
             cpp_assert(etl::dim<1>(a) == etl::dim<0>(c), "Invalid dimensions for bias_batch_mean_4d");
         }
     }

     // Assignment functions

     template <etl_1d L>
     void assign_to(L&& lhs) const {
         inc_counter("temp:assign");

         auto& a = this->a();

         using T = value_t<A>;

         check(a, lhs);

         if constexpr (Mean && impl::egblas::has_sbias_batch_mean4 && all_row_major<A> && all_floating<A, L>) {
             const auto N = etl::dim<0>(a);
             const auto K = etl::dim<1>(a);
             const auto W = etl::dim<2>(a);
             const auto H = etl::dim<3>(a);

             decltype(auto) t1 = smart_forward_gpu(a);
             t1.ensure_gpu_up_to_date();

             lhs.ensure_gpu_allocated();

             impl::egblas::bias_batch_mean4(N, K, W, H, t1.gpu_memory(), lhs.gpu_memory());

             lhs.validate_gpu();
             lhs.invalidate_cpu();
         } else if constexpr (!Mean && impl::egblas::has_sbias_batch_sum4 && all_row_major<A> && all_floating<A, L>) {
             const auto N = etl::dim<0>(a);
             const auto K = etl::dim<1>(a);
             const auto W = etl::dim<2>(a);
             const auto H = etl::dim<3>(a);

             decltype(auto) t1 = smart_forward_gpu(a);
             t1.ensure_gpu_up_to_date();

             lhs.ensure_gpu_allocated();

             impl::egblas::bias_batch_sum4(N, K, W, H, t1.gpu_memory(), lhs.gpu_memory());

             lhs.validate_gpu();
             lhs.invalidate_cpu();
         } else if constexpr (!Mean && cudnn_enabled && all_floating<A, L>) {
             impl::cudnn::bias_batch_mean_4d(smart_forward_gpu(a), lhs);
         } else {
             const auto N = etl::size(a) / etl::size(lhs);
             const auto K = etl::size(lhs);

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) = mean / static_cast<T>(N);
                         } else {
                             lhs(k) = mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     template <etl_1d L>
     void assign_add_to(L&& lhs) const {
         if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
             std_add_evaluate(*this, lhs);
         } else {
             auto& a = this->a();

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();
             a.ensure_gpu_up_to_date();

             const auto N = etl::size(a) / etl::size(lhs);
             const auto K = etl::size(lhs);

             using T = value_t<A>;

             check(a, lhs);

             lhs.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) += mean / static_cast<T>(N);
                         } else {
                             lhs(k) += mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     template <etl_1d L>
     void assign_sub_to(L&& lhs) const {
         if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
             std_sub_evaluate(*this, lhs);
         } else {
             auto& a = this->a();

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();
             a.ensure_gpu_up_to_date();

             [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
             const auto                  K = etl::size(lhs);

             using T = value_t<A>;

             check(a, lhs);

             lhs.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) -= mean / static_cast<T>(N);
                         } else {
                             lhs(k) -= mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     template <etl_1d L>
     void assign_mul_to(L&& lhs) const {
         if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
             std_mul_evaluate(*this, lhs);
         } else {
             auto& a = this->a();

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();
             a.ensure_gpu_up_to_date();

             [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
             const auto                  K = etl::size(lhs);

             using T = value_t<A>;

             check(a, lhs);

             lhs.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) *= mean / static_cast<T>(N);
                         } else {
                             lhs(k) *= mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     template <etl_1d L>
     void assign_div_to(L&& lhs) const {
         if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
             std_div_evaluate(*this, lhs);
         } else {
             auto& a = this->a();

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();
             a.ensure_gpu_up_to_date();

             [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
             const auto                  K = etl::size(lhs);

             using T = value_t<A>;

             check(a, lhs);

             lhs.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) /= mean / static_cast<T>(N);
                         } else {
                             lhs(k) /= mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     template <etl_1d L>
     void assign_mod_to(L&& lhs) const {
         if constexpr (all_floating<A, L> && ((!Mean && cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum4))) {
             std_mod_evaluate(*this, lhs);
         } else {
             auto& a = this->a();

             standard_evaluator::pre_assign_rhs(a);

             a.ensure_cpu_up_to_date();
             a.ensure_gpu_up_to_date();

             [[maybe_unused]] const auto N = etl::size(a) / etl::size(lhs);
             const auto                  K = etl::size(lhs);

             using T = value_t<A>;

             check(a, lhs);

             lhs.ensure_cpu_up_to_date();

             auto batch_fun_k = [&](const size_t first, const size_t last) {
                 CPU_SECTION {
                     for (size_t k = first; k < last; ++k) {
                         T mean(0);

                         for (size_t b = 0; b < etl::dim<0>(a); ++b) {
                             mean += sum(a(b)(k));
                         }

                         if constexpr (Mean) {
                             lhs(k) %= mean / static_cast<T>(N);
                         } else {
                             lhs(k) %= mean;
                         }
                     }
                 }
             };

             engine_dispatch_1d_serial(batch_fun_k, 0, K, 2UL);

             lhs.validate_cpu();
             lhs.invalidate_gpu();
         }
     }

     friend std::ostream& operator<<(std::ostream& os, const bias_batch_mean_4d_expr& expr) {
         if (Mean) {
             return os << "bias_batch_mean_4d(" << expr._a << ")";
         } else {
             return os << "bias_batch_sum_4d(" << expr._a << ")";
         }
     }
 };

 template <typename A, bool Mean>
 struct etl_traits<etl::bias_batch_mean_4d_expr<A, Mean>> {
     using expr_t     = etl::bias_batch_mean_4d_expr<A, Mean>;
     using sub_expr_t = std::decay_t<A>;
     using sub_traits = etl_traits<sub_expr_t>;
     using value_type = value_t<A>;

     static constexpr bool is_etl         = true;
     static constexpr bool is_transformer = false;
     static constexpr bool is_view        = false;
     static constexpr bool is_magic_view  = false;
     static constexpr bool is_fast        = sub_traits::is_fast;
     static constexpr bool is_linear      = false;
     static constexpr bool is_thread_safe = true;
     static constexpr bool is_value       = false;
     static constexpr bool is_direct      = true;
     static constexpr bool is_generator   = false;
     static constexpr bool is_padded      = false;
     static constexpr bool is_aligned     = true;
     static constexpr bool is_temporary   = true;
     static constexpr bool gpu_computable = is_gpu_t<value_type> && cuda_enabled;
     static constexpr order storage_order = sub_traits::storage_order;

     template <vector_mode_t V>
     static constexpr bool vectorizable = true;

     template <size_t DD>
     static constexpr size_t dim() {
         static_assert(DD == 0, "Invalid dimensions access");
         return decay_traits<A>::template dim<1>();
     }

     static size_t dim(const expr_t& e, [[maybe_unused]] size_t d) {
         cpp_assert(d == 0, "Invalid dimensions access");

         return etl::dim<1>(e._a);
     }

     static size_t size(const expr_t& e) {
         return etl::dim<1>(e._a);
     }

     static constexpr size_t size() {
         return decay_traits<A>::template dim<1>();
     }

     static constexpr size_t dimensions() {
         return 1;
     }

     static constexpr int complexity() noexcept {
         return -1;
     }
 };

 template <etl_4d E>
 bias_batch_mean_4d_expr<detail::build_type<E>, true> bias_batch_mean_4d(const E& value) {
     return bias_batch_mean_4d_expr<detail::build_type<E>, true>{value};
 }

 template <etl_4d E>
 bias_batch_mean_4d_expr<detail::build_type<E>, false> bias_batch_sum_4d(const E& value) {
     return bias_batch_mean_4d_expr<detail::build_type<E>, false>{value};
 }

 } //end of namespace etl
etl::mean
value_t< E > mean(E &&values)
Returns the mean of all the values contained in the given expression.
Definition: expression_builder.hpp:650

etl::bias_batch_mean_4d_expr::assign_mod_to
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:346

etl::bias_batch_sum_4d
bias_batch_mean_4d_expr< detail::build_type< E >, false > bias_batch_sum_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:513

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::value_type
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_mean_4d_expr.hpp:415

etl::bias_batch_mean_4d_expr::assign_to
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_mean_4d_expr.hpp:67

etl::base_temporary_expr_un< bias_batch_mean_4d_expr< A, Mean >, A >::a
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489

etl::bias_batch_mean_4d_expr::storage_order
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_mean_4d_expr.hpp:28

etl::bias_batch_mean_4d_expr::value_type
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_mean_4d_expr.hpp:23

etl::engine_dispatch_1d_serial
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::size
static constexpr size_t size()
Returns the size of the expression.
Definition: bias_batch_mean_4d_expr.hpp:476

etl::is_magic_view
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::dimensions
static constexpr size_t dimensions()
Returns the number of dimensions of the expression.
Definition: bias_batch_mean_4d_expr.hpp:484

etl::order
order
Storage order of a matrix.
Definition: order.hpp:15

etl::bias_batch_mean_4d_expr::check
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_mean_4d_expr.hpp:52

etl::cuda_enabled
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94

bias_batch_sum.hpp
EGBLAS wrappers for the bias_batch_sum operation.

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::sub_expr_t
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_mean_4d_expr.hpp:413

etl::bias_batch_mean_4d_expr::gpu_computable
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_mean_4d_expr.hpp:34

etl::bias_batch_mean_4d_expr
A transposition expression.
Definition: bias_batch_mean_4d_expr.hpp:22

etl::base_temporary_expr_un::_a
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::size
static size_t size(const expr_t &e)
Returns the size of the expression.
Definition: bias_batch_mean_4d_expr.hpp:468

etl::is_fast
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588

etl::bias_batch_mean_4d_expr::assign_add_to
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:146

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::dim
static size_t dim(const expr_t &e, [[maybe_unused]] size_t d)
Returns the dth dimension of the expression.
Definition: bias_batch_mean_4d_expr.hpp:457

etl::etl_traits
Traits to get information about ETL types.
Definition: tmp.hpp:68

etl
Root namespace for the ETL library.
Definition: adapter.hpp:15

etl::bias_batch_mean_4d_expr::operator<<
friend std::ostream & operator<<(std::ostream &os, const bias_batch_mean_4d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_mean_4d_expr.hpp:397

bias_batch_mean.hpp
bias_batch_mean implementations with NVidia cuDNN library

etl::cudnn_enabled
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114

etl::bias_batch_mean_4d_expr::assign_mul_to
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:246

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::dim
static constexpr size_t dim()
Returns the DDth dimension of the expression.
Definition: bias_batch_mean_4d_expr.hpp:446

etl::std_mod_evaluate
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271

etl::std_mul_evaluate
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233

etl::bias_batch_mean_4d_expr::bias_batch_mean_4d_expr
bias_batch_mean_4d_expr(A a)
Construct a new expression.
Definition: bias_batch_mean_4d_expr.hpp:42

etl::is_transformer
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297

etl::sum
value_t< E > sum(E &&values)
Returns the sum of all the values contained in the given expression.
Definition: expression_builder.hpp:624

etl::smart_forward_gpu
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343

etl::size
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108

etl::bias_batch_mean_4d_expr::assign_div_to
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:296

etl::is_view
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304

etl::etl_traits::is_fast
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25

etl::std_sub_evaluate
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214

etl::base_temporary_expr_un
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443

etl::bias_batch_mean_4d_expr::assign_sub_to
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_mean_4d_expr.hpp:196

etl::is_thread_safe
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687

etl::value_t
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81

etl::std_div_evaluate
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252

etl::bias_batch_mean_4d
bias_batch_mean_4d_expr< detail::build_type< E >, true > bias_batch_mean_4d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_4d_expr.hpp:503

etl::inc_counter
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25

etl::std_add_evaluate
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195

etl::etl_traits< etl::bias_batch_mean_4d_expr< A, Mean > >::complexity
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: bias_batch_mean_4d_expr.hpp:492