10 #include "etl/expr/base_temporary_expr.hpp" 21 template <etl_2d A,
bool Mean>
35 || (impl::egblas::has_sbias_batch_sum && all_row_major<A> && all_single_precision<A>)
36 || (impl::egblas::has_dbias_batch_sum && all_row_major<A> && all_double_precision<A>);
52 static void check([[maybe_unused]]
const A&
a, [[maybe_unused]]
const C& c) {
53 if constexpr (all_fast<A, C>) {
54 static_assert(etl::dim<1, A>() == etl::dim<0, C>(),
"Invalid dimensions for bias_batch_mean_2d");
56 cpp_assert(etl::dim<1>(a) == etl::dim<0>(c),
"Invalid dimensions for bias_batch_mean_2d");
76 if constexpr (!Mean && impl::egblas::has_sbias_batch_sum && all_row_major<A> && all_floating<A, L>) {
77 const auto N = etl::dim<0>(
a);
78 const auto K = etl::dim<1>(
a);
81 t1.ensure_gpu_up_to_date();
83 lhs.ensure_gpu_allocated();
85 impl::egblas::bias_batch_sum(N, K, t1.gpu_memory(), 1, lhs.gpu_memory(), 1);
89 }
else if constexpr (Mean && impl::egblas::has_sbias_batch_mean && all_row_major<A> && all_floating<A, L>) {
90 const auto N = etl::dim<0>(
a);
91 const auto K = etl::dim<1>(
a);
94 t1.ensure_gpu_up_to_date();
96 lhs.ensure_gpu_allocated();
98 impl::egblas::bias_batch_mean(N, K, t1.gpu_memory(), 1, lhs.gpu_memory(), 1);
101 lhs.invalidate_cpu();
102 }
else if constexpr (!Mean &&
cudnn_enabled && all_floating<A, L>) {
105 const auto N = etl::dim<0>(
a);
106 const auto K = etl::dim<1>(
a);
108 standard_evaluator::pre_assign_rhs(
a);
110 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
111 for (
size_t k = first; k < last; ++k) {
114 for (
size_t b = 0; b < N; ++b) {
118 if constexpr (Mean) {
119 lhs(k) = mean /
static_cast<T
>(N);
134 template <etl_expr L>
136 if constexpr (all_floating<A, L> && ((!Mean &&
cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
141 standard_evaluator::pre_assign_rhs(
a);
150 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
151 for (
size_t k = first; k < last; ++k) {
154 for (
size_t b = 0; b < N; ++b) {
158 if constexpr (Mean) {
159 lhs(k) += mean /
static_cast<T
>(N);
174 template <etl_expr L>
176 if constexpr (all_floating<A, L> && ((!Mean &&
cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
181 standard_evaluator::pre_assign_rhs(
a);
190 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
191 for (
size_t k = first; k < last; ++k) {
194 for (
size_t b = 0; b < N; ++b) {
198 if constexpr (Mean) {
199 lhs(k) -= mean /
static_cast<T
>(N);
214 template <etl_expr L>
216 if constexpr (all_floating<A, L> && ((!Mean &&
cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
221 standard_evaluator::pre_assign_rhs(
a);
230 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
231 for (
size_t k = first; k < last; ++k) {
234 for (
size_t b = 0; b < N; ++b) {
238 if constexpr (Mean) {
239 lhs(k) *= mean /
static_cast<T
>(N);
254 template <etl_expr L>
256 if constexpr (all_floating<A, L> && ((!Mean &&
cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
261 standard_evaluator::pre_assign_rhs(
a);
270 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
271 for (
size_t k = first; k < last; ++k) {
274 for (
size_t b = 0; b < N; ++b) {
278 if constexpr (Mean) {
279 lhs(k) /= mean /
static_cast<T
>(N);
294 template <etl_expr L>
296 if constexpr (all_floating<A, L> && ((!Mean &&
cudnn_enabled) || (all_row_major<A, L> && impl::egblas::has_sbias_batch_sum))) {
301 standard_evaluator::pre_assign_rhs(
a);
310 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
311 for (
size_t k = first; k < last; ++k) {
314 for (
size_t b = 0; b < N; ++b) {
318 if constexpr (Mean) {
319 lhs(k) %= mean /
static_cast<T
>(N);
338 return os <<
"bias_batch_mean_2d(" << expr.
_a <<
")";
340 return os <<
"bias_batch_sum_2d(" << expr.
_a <<
")";
349 template <
typename A,
bool Mean>
356 static constexpr
bool is_etl =
true;
361 static constexpr
bool is_linear =
false;
363 static constexpr
bool is_value =
false;
364 static constexpr
bool is_direct =
true;
365 static constexpr
bool is_generator =
false;
366 static constexpr
bool is_padded =
false;
367 static constexpr
bool is_aligned =
true;
368 static constexpr
bool is_temporary =
true;
369 static constexpr
order storage_order = sub_traits::storage_order;
370 static constexpr
bool gpu_computable = is_gpu_t<value_type> &&
cuda_enabled;
377 template <vector_mode_t V>
378 static constexpr
bool vectorizable =
true;
395 static size_t dim(
const expr_t& e, [[maybe_unused]]
size_t d) {
396 cpp_assert(d == 0,
"Invalid dimensions access");
398 return etl::dim<1>(e.
_a);
406 static size_t size(
const expr_t& e) {
407 return etl::dim<1>(e.
_a);
414 static constexpr
size_t size() {
422 static constexpr
size_t dimensions() {
value_t< E > mean(E &&values)
Returns the mean of all the values contained in the given expression.
Definition: expression_builder.hpp:650
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:295
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:175
constexpr int complexity([[maybe_unused]] const E &expr) noexcept
Return the complexity of the expression.
Definition: helpers.hpp:38
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
EGBLAS wrappers for the bias_batch_sum operation.
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
A transposition expression.
Definition: bias_batch_mean_2d_expr.hpp:22
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
bias_batch_mean implementations with NVidia cuDNN library
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_mean_2d_expr.hpp:67
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:135
constexpr bool cudnn_enabled
Indicates if the NVIDIA CUDNN library is available for ETL.
Definition: config.hpp:114
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:215
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_mean_2d_expr.hpp:34
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
bias_batch_mean_2d_expr< detail::build_type< E >, false > bias_batch_sum_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:451
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_mean_2d_expr.hpp:28
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_mean_2d_expr.hpp:52
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443
bias_batch_mean_2d_expr< detail::build_type< E >, true > bias_batch_mean_2d(const E &value)
Returns the transpose of the given expression.
Definition: bias_batch_mean_2d_expr.hpp:441
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
bias_batch_mean_2d_expr(A a)
Construct a new expression.
Definition: bias_batch_mean_2d_expr.hpp:42
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_mean_2d_expr.hpp:354
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_mean_2d_expr.hpp:352
friend std::ostream & operator<<(std::ostream &os, const bias_batch_mean_2d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_mean_2d_expr.hpp:336
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_mean_2d_expr.hpp:255
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_mean_2d_expr.hpp:23