10 #include "etl/expr/base_temporary_expr.hpp" 20 template <etl_4d A, etl_1d B>
34 (impl::egblas::has_sbias_batch_var4 && all_row_major<A> && all_single_precision<A>)
35 || (impl::egblas::has_dbias_batch_var4 && all_row_major<A> && all_double_precision<A>);
51 static void check([[maybe_unused]]
const A&
a, [[maybe_unused]]
const B&
b, [[maybe_unused]]
const C& c) {
52 if constexpr (all_fast<A, B, C>) {
53 static_assert(etl::dim<1, A>() == etl::dim<0, C>(),
"Invalid dimensions for bias_batch_var_4d");
54 static_assert(etl::dim<0, B>() == etl::dim<0, C>(),
"Invalid dimensions for bias_batch_var_4d");
56 cpp_assert(etl::dim<1>(a) == etl::dim<0>(c),
"Invalid dimensions for bias_batch_var_4d");
57 cpp_assert(etl::dim<0>(b) == etl::dim<0>(c),
"Invalid dimensions for bias_batch_var_4d");
78 const auto N = etl::dim<0>(
a);
79 const auto K = etl::dim<1>(
a);
82 if constexpr (impl::egblas::has_sbias_batch_var4 && all_row_major<A> && all_floating<A, L>) {
83 const auto W = etl::dim<2>(
a);
84 const auto H = etl::dim<3>(
a);
89 t1.ensure_gpu_up_to_date();
90 t2.ensure_gpu_up_to_date();
92 lhs.ensure_gpu_allocated();
94 impl::egblas::bias_batch_var4(N, K, W, H, t1.gpu_memory(), t2.gpu_memory(), lhs.gpu_memory());
99 standard_evaluator::pre_assign_rhs(
a);
100 standard_evaluator::pre_assign_rhs(
b);
102 a.ensure_cpu_up_to_date();
103 b.ensure_cpu_up_to_date();
110 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
112 for (
size_t k = first; k < last; ++k) {
116 for (
size_t bb = 0; bb < N; ++bb) {
117 for (
size_t k = first; k < last; ++k) {
118 lhs(k) +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
122 for (
size_t k = first; k < last; ++k) {
131 lhs.invalidate_gpu();
139 template <etl_expr L>
144 standard_evaluator::pre_assign_rhs(
a);
145 standard_evaluator::pre_assign_rhs(
b);
149 const auto N = etl::dim<0>(
a);
150 const auto K = etl::dim<1>(
a);
155 a.ensure_cpu_up_to_date();
156 b.ensure_cpu_up_to_date();
157 lhs.ensure_cpu_up_to_date();
159 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
161 for (
size_t k = first; k < last; ++k) {
164 for (
size_t bb = 0; bb < N; ++bb) {
165 var +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
176 lhs.invalidate_gpu();
183 template <etl_expr L>
188 standard_evaluator::pre_assign_rhs(
a);
189 standard_evaluator::pre_assign_rhs(
b);
193 const auto N = etl::dim<0>(
a);
194 const auto K = etl::dim<1>(
a);
199 a.ensure_cpu_up_to_date();
200 b.ensure_cpu_up_to_date();
201 lhs.ensure_cpu_up_to_date();
203 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
205 for (
size_t k = first; k < last; ++k) {
208 for (
size_t bb = 0; bb < N; ++bb) {
209 var +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
220 lhs.invalidate_gpu();
227 template <etl_expr L>
232 standard_evaluator::pre_assign_rhs(
a);
233 standard_evaluator::pre_assign_rhs(
b);
237 const auto N = etl::dim<0>(
a);
238 const auto K = etl::dim<1>(
a);
243 a.ensure_cpu_up_to_date();
244 b.ensure_cpu_up_to_date();
245 lhs.ensure_cpu_up_to_date();
247 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
249 for (
size_t k = first; k < last; ++k) {
252 for (
size_t bb = 0; bb < N; ++bb) {
253 var +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
264 lhs.invalidate_gpu();
271 template <etl_expr L>
276 standard_evaluator::pre_assign_rhs(
a);
277 standard_evaluator::pre_assign_rhs(
b);
281 const auto N = etl::dim<0>(
a);
282 const auto K = etl::dim<1>(
a);
287 a.ensure_cpu_up_to_date();
288 b.ensure_cpu_up_to_date();
289 lhs.ensure_cpu_up_to_date();
291 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
293 for (
size_t k = first; k < last; ++k) {
296 for (
size_t bb = 0; bb < N; ++bb) {
297 var +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
308 lhs.invalidate_gpu();
315 template <etl_expr L>
320 standard_evaluator::pre_assign_rhs(
a);
321 standard_evaluator::pre_assign_rhs(
b);
325 const auto N = etl::dim<0>(
a);
326 const auto K = etl::dim<1>(
a);
331 a.ensure_cpu_up_to_date();
332 b.ensure_cpu_up_to_date();
333 lhs.ensure_cpu_up_to_date();
335 auto batch_fun_k = [&](
const size_t first,
const size_t last) {
337 for (
size_t k = first; k < last; ++k) {
340 for (
size_t bb = 0; bb < N; ++bb) {
341 var +=
sum((
a(bb)(k) -
b(k)) >> (
a(bb)(k) -
b(k)));
352 lhs.invalidate_gpu();
362 return os <<
"bias_batch_var_4d(" << expr.
_a <<
")";
370 template <
typename A,
typename B>
377 static constexpr
bool is_etl =
true;
382 static constexpr
bool is_linear =
false;
384 static constexpr
bool is_value =
false;
385 static constexpr
bool is_direct =
true;
386 static constexpr
bool is_generator =
false;
387 static constexpr
bool is_padded =
false;
388 static constexpr
bool is_aligned =
true;
389 static constexpr
bool is_temporary =
true;
390 static constexpr
order storage_order = sub_traits::storage_order;
391 static constexpr
bool gpu_computable = is_gpu_t<value_type> &&
cuda_enabled;
398 template <vector_mode_t V>
399 static constexpr
bool vectorizable =
true;
416 static size_t dim(
const expr_t& e, [[maybe_unused]]
size_t d) {
417 cpp_assert(d == 0,
"Invalid dimensions access");
418 return etl::dim<1>(e.
_a);
426 static size_t size(
const expr_t& e) {
427 return etl::dim<1>(e.
_a);
434 static constexpr
size_t size() {
442 static constexpr
size_t dimensions() {
460 template <etl_4d A, etl_1d B>
friend std::ostream & operator<<(std::ostream &os, const bias_batch_var_4d_expr &expr)
Print a representation of the expression on the given stream.
Definition: bias_batch_var_4d_expr.hpp:361
constexpr int complexity([[maybe_unused]] const E &expr) noexcept
Return the complexity of the expression.
Definition: helpers.hpp:38
static void check([[maybe_unused]] const A &a, [[maybe_unused]] const B &b, [[maybe_unused]] const C &c)
Validate the transposition dimensions.
Definition: bias_batch_var_4d_expr.hpp:51
value_t< A > value_type
The type of value of the expression.
Definition: bias_batch_var_4d_expr.hpp:22
void engine_dispatch_1d_serial(Functor &&functor, size_t first, size_t last, size_t threshold, [[maybe_unused]] size_t n_threads=etl::threads)
Dispatch the elements of a range to a functor in a parallel manner, using the global thread engine...
Definition: parallel_support.hpp:734
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
order
Storage order of a matrix.
Definition: order.hpp:15
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: bias_batch_var_4d_expr.hpp:184
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: bias_batch_var_4d_expr.hpp:272
EGBLAS wrappers for the bias_batch_sum operation.
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
std::decay_t< A > sub_expr_t
The sub expression type.
Definition: bias_batch_var_4d_expr.hpp:373
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
void assign_to(L &&lhs) const
Assign to a matrix of the same storage order.
Definition: bias_batch_var_4d_expr.hpp:68
bias_batch_var_4d_expr< detail::build_type< A >, detail::build_type< B > > bias_batch_var_4d(const A &a, const B &b)
Returns the transpose of the given expression.
Definition: bias_batch_var_4d_expr.hpp:461
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
bias_batch_var_4d_expr(A a, B b)
Construct a new expression.
Definition: bias_batch_var_4d_expr.hpp:41
static constexpr auto storage_order
The sub storage order.
Definition: bias_batch_var_4d_expr.hpp:27
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
std::conditional_t< is_etl_value< T >, const std::decay_t< T > &, std::decay_t< T > > build_type
Helper to build the type for a sub expression.
Definition: expression_helpers.hpp:24
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: bias_batch_var_4d_expr.hpp:140
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
value_t< E > sum(E &&values)
Returns the sum of all the values contained in the given expression.
Definition: expression_builder.hpp:624
static constexpr bool gpu_computable
Indicates if the temporary expression can be directly evaluated using only GPU.
Definition: bias_batch_var_4d_expr.hpp:33
decltype(auto) smart_forward_gpu(E &expr)
Smart forwarding for a temporary expression that will be computed in GPU.
Definition: helpers.hpp:343
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
static constexpr bool is_fast
Indicates if T is a fast structure.
Definition: traits_base.hpp:25
A transposition expression.
Definition: bias_batch_var_4d_expr.hpp:21
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: bias_batch_var_4d_expr.hpp:316
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
value_t< A > value_type
The value type of the expression.
Definition: bias_batch_var_4d_expr.hpp:375
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: bias_batch_var_4d_expr.hpp:228