23 template <
typename V,
typename L,
typename R,
typename C>
28 static constexpr
size_t vec_size = vec_type::template traits<T>::size;
30 const auto B = etl::dim<0>(x);
31 const auto M = etl::dim<2>(x);
32 const auto N = etl::dim<3>(x);
33 const auto MN = M * N;
35 x.ensure_cpu_up_to_date();
36 b.ensure_cpu_up_to_date();
40 auto batch_fun = [&](
size_t first,
size_t last) {
41 for (
size_t i = first; i < last; ++i) {
42 for (
size_t j = 0; j < etl::dim<1>(x); ++j) {
43 auto x_s = x(i)(j).memory_start();
44 auto y_s = y(i)(j).memory_start();
46 auto b1 = vec_type::set(b[j]);
50 for (; m + vec_size * 8 - 1 < MN; m += vec_size * 8) {
60 auto r1 = vec_type::add(x1, b1);
61 auto r2 = vec_type::add(x2, b1);
62 auto r3 = vec_type::add(x3, b1);
63 auto r4 = vec_type::add(x4, b1);
64 auto r5 = vec_type::add(x5, b1);
65 auto r6 = vec_type::add(x6, b1);
66 auto r7 = vec_type::add(x7, b1);
67 auto r8 = vec_type::add(x8, b1);
79 for (; m + vec_size * 4 - 1 < MN; m += vec_size * 4) {
85 auto r1 = vec_type::add(x1, b1);
86 auto r2 = vec_type::add(x2, b1);
87 auto r3 = vec_type::add(x3, b1);
88 auto r4 = vec_type::add(x4, b1);
96 for (; m + vec_size * 2 - 1 < MN; m += vec_size * 2) {
100 auto r1 = vec_type::add(x1, b1);
101 auto r2 = vec_type::add(x2, b1);
107 for (; m + vec_size - 1 < MN; m += vec_size) {
110 auto r1 = vec_type::add(x1, b1);
115 for (; m < MN; ++m) {
116 y_s[m] = x_s[m] + b[j];
134 template <
typename V,
typename L,
typename R,
typename C>
139 static constexpr
size_t vec_size = vec_type::template traits<T>::size;
141 const auto B = etl::dim<0>(x);
142 const auto K = etl::dim<1>(x);
144 x.ensure_cpu_up_to_date();
145 b.ensure_cpu_up_to_date();
149 auto batch_fun = [&](
size_t first,
size_t last) {
150 auto b_s = b.memory_start();
151 auto x_s = x.memory_start();
152 auto y_s = y.memory_start();
154 for (
size_t i = first; i < last; ++i) {
157 for (; j + vec_size - 1 < K; j += vec_size) {
160 auto t1 = vec_type::add(r1, x1);
165 y(i, j) = x(i, j) + b(j);
181 template <
typename A,
typename B,
typename C>
183 bias_add_4d_impl<default_vec>(x, b, y);
192 template <
typename A,
typename B,
typename C>
194 bias_add_2d_impl<default_vec>(x, b, y);
Definition: bias_add.hpp:15
typename V::template vec_type< value_type > vec_type
The vectorization type for V.
Definition: dyn_matrix_view.hpp:43
void bias_add_2d_impl(const L &x, const R &b, C &&y)
Compute the bias addition of b into x and store the result in y.
Definition: bias_add.hpp:135
void storeu(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:187
bias_add_2d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_2d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_2d_expr.hpp:378
auto loadu(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:154
bias_add_4d_expr< detail::build_type< E >, detail::build_type< B > > bias_add_4d(const E &x, const B &biases)
Returns the result of adding the bias [K] to the 4D matrix [N1, K, N2, N3].
Definition: bias_add_4d_expr.hpp:388
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void bias_add_4d_impl(const L &x, const R &b, C &&y)
Compute the bias addition of b into x and store the result in y.
Definition: bias_add.hpp:24