33 #include "etl/vec_eval_functors.hpp" 42 namespace standard_evaluator {
50 expr.visit(eval_visitor);
58 template <
typename Fun,
typename E,
typename R>
61 auto slice_functor = [&](
auto&& lhs,
auto&& rhs) { Fun::apply(lhs, rhs); };
63 engine_dispatch_1d_slice_binary(result, expr, slice_functor, 0);
65 Fun::apply(result, expr);
79 template <
typename E,
typename R>
83 for (
size_t i = 0; i <
etl::size(result); ++i) {
84 result[i] = expr.read_flat(i);
97 template <
typename E,
typename R>
99 if constexpr (!is_gpu_dyn_matrix<R>) {
101 cpp_assert(expr.is_cpu_up_to_date() || expr.is_gpu_up_to_date(),
"expr must be in valid state");
103 if (expr.is_cpu_up_to_date()) {
106 direct_copy(expr.memory_start(), expr.memory_end(), result.memory_start());
108 result.validate_cpu();
111 if (expr.is_gpu_up_to_date()) {
114 bool cpu_status = expr.is_cpu_up_to_date();
116 result.ensure_gpu_allocated();
117 result.gpu_copy_from(expr.gpu_memory());
121 result.validate_cpu();
128 if (!expr.is_cpu_up_to_date()) {
129 result.invalidate_cpu();
132 if (!expr.is_gpu_up_to_date()) {
133 result.invalidate_gpu();
136 cpp_assert(expr.is_cpu_up_to_date() == result.is_cpu_up_to_date(),
"fast_assign must preserve CPU status");
137 cpp_assert(expr.is_gpu_up_to_date() == result.is_gpu_up_to_date(),
"fast_assign must preserve GPU status");
139 direct_copy(expr.memory_start(), expr.memory_end(), result.memory_start());
143 cpp_assert(expr.is_gpu_up_to_date(),
"expr must be in valid state");
147 result.ensure_gpu_allocated();
148 result.gpu_copy_from(expr.gpu_memory());
153 result.validate_gpu();
154 result.invalidate_cpu();
156 cpp_assert(result.is_gpu_up_to_date(),
"fast_assign must preserve GPU status");
158 cpp_unreachable(
"gpu_dyn_matrix should never be used without GPU support");
172 template <
typename E,
typename R>
174 static_assert(!is_gpu_dyn_matrix<R>,
"gpu_dyn_matrix should not be used here");
178 expr.ensure_cpu_up_to_date();
180 direct_copy(expr.memory_start(), expr.memory_end(), result.memory_start());
182 result.validate_cpu();
183 result.invalidate_gpu();
194 template <
typename E,
typename R>
198 result.ensure_gpu_allocated();
200 if constexpr (is_binary_expr<E>) {
201 if (expr.alias(result)) {
206 result.gpu_copy_from(t2.gpu_memory());
217 result.validate_gpu();
218 result.invalidate_cpu();
230 template <
typename E,
typename R>
235 if constexpr (is_thread_safe<E>) {
239 par_exec<detail::Assign>(expr, result);
249 result.validate_cpu();
250 result.invalidate_gpu();
262 template <
typename E,
typename R>
267 constexpr
auto V = detail::select_vector_mode<E, R>();
269 if constexpr (is_thread_safe<E>) {
273 par_exec<detail::VectorizedAssign<V>>(expr, result);
283 result.validate_cpu();
284 result.invalidate_gpu();
289 template <
typename E,
typename R>
290 void assign_evaluate_impl_no_gpu(E&& expr, R&& result) {
291 if constexpr (detail::standard_assign_no_gpu<E, R>) {
293 }
else if constexpr (std::is_same_v<
value_t<E>,
value_t<R>> && detail::fast_assign_no_gpu<E, R>) {
295 }
else if constexpr (!std::is_same_v<
value_t<E>,
value_t<R>> && detail::fast_assign_no_gpu<E, R>) {
297 }
else if constexpr (detail::direct_assign_no_gpu<E, R>) {
299 }
else if constexpr (detail::vectorized_assign_no_gpu<E, R>) {
304 template <
typename E,
typename R>
305 void assign_evaluate_impl(E&& expr, R&& result) {
306 if constexpr (detail::standard_assign<E, R>) {
312 }
else if constexpr (detail::gpu_assign<E, R>) {
314 assign_evaluate_impl_no_gpu(expr, result);
318 }
else if constexpr (detail::direct_assign<E, R>) {
320 }
else if constexpr (detail::vectorized_assign<E, R>) {
335 template <
typename E,
typename R>
341 for (
size_t i = 0; i <
etl::size(result); ++i) {
342 result[i] += expr[i];
345 result.validate_cpu();
346 result.invalidate_gpu();
358 template <
typename E,
typename R>
365 if constexpr (is_thread_safe<E>) {
369 par_exec<detail::AssignAdd>(expr, result);
379 result.validate_cpu();
380 result.invalidate_gpu();
392 template <
typename E,
typename R>
399 constexpr
auto V = detail::select_vector_mode<E, R>();
401 if constexpr (is_thread_safe<E>) {
405 par_exec<detail::VectorizedAssignAdd<V>>(expr, result);
415 result.validate_cpu();
416 result.invalidate_gpu();
419 #ifdef ETL_CUBLAS_MODE 430 template <
typename E,
typename R>
431 void gpu_compound_add_impl(E& expr, R& result) {
434 result.ensure_gpu_up_to_date();
440 impl::egblas::axpy(
etl::size(result), alpha, t1.gpu_memory(), 1, result.gpu_memory(), 1);
443 result.validate_gpu();
444 result.invalidate_cpu();
449 #ifdef ETL_EGBLAS_MODE 460 template <
typename E,
typename R>
461 void gpu_compound_add_scalar_impl(E& expr, R& result) {
464 result.ensure_gpu_up_to_date();
467 impl::egblas::scalar_add(result.gpu_memory(),
etl::size(result), 1, expr.value);
470 result.validate_gpu();
471 result.invalidate_cpu();
486 template <
typename E,
typename R>
488 if constexpr (detail::standard_compound_no_gpu<E, R>) {
490 }
else if constexpr (detail::direct_compound_no_gpu<E, R>) {
492 }
else if constexpr (detail::vectorized_compound_no_gpu<E, R>) {
502 template <
typename E,
typename R>
504 if constexpr (detail::standard_compound<E, R>) {
506 }
else if constexpr (detail::direct_compound<E, R>) {
508 }
else if constexpr (detail::vectorized_compound<E, R>) {
510 }
else if constexpr (
cublas_enabled && detail::gpu_compound<E, R> && !is_scalar<E>) {
514 gpu_compound_add_impl(expr, result);
516 }
else if constexpr (
egblas_enabled && detail::gpu_compound<E, R> && is_scalar<E>) {
520 gpu_compound_add_scalar_impl(expr, result);
535 template <
typename E,
typename R>
544 for (
size_t i = 0; i <
etl::size(result); ++i) {
545 result[i] -= expr[i];
548 result.validate_cpu();
549 result.invalidate_gpu();
561 template <
typename E,
typename R>
568 if constexpr (is_thread_safe<E>) {
571 par_exec<detail::AssignSub>(expr, result);
582 result.validate_cpu();
583 result.invalidate_gpu();
595 template <
typename E,
typename R>
602 constexpr
auto V = detail::select_vector_mode<E, R>();
604 if constexpr (is_thread_safe<E>) {
608 par_exec<detail::VectorizedAssignSub<V>>(expr, result);
618 result.validate_cpu();
619 result.invalidate_gpu();
630 template <
typename E,
typename R>
634 result.ensure_gpu_up_to_date();
640 impl::egblas::axpy(
etl::size(result), alpha, t1.gpu_memory(), 1, result.gpu_memory(), 1);
643 result.validate_gpu();
644 result.invalidate_cpu();
647 #ifdef ETL_EGBLAS_MODE 657 template <
typename E,
typename R>
658 void gpu_compound_sub_scalar_impl(E& expr, R& result) {
661 result.ensure_gpu_up_to_date();
664 auto value = -expr.value;
665 impl::egblas::scalar_add(result.gpu_memory(),
etl::size(result), 1, value);
668 result.validate_gpu();
669 result.invalidate_cpu();
684 template <
typename E,
typename R>
686 if constexpr (detail::standard_compound_no_gpu<E, R>) {
688 }
else if constexpr (detail::direct_compound_no_gpu<E, R>) {
690 }
else if constexpr (detail::vectorized_compound_no_gpu<E, R>) {
700 template <
typename E,
typename R>
702 if constexpr (detail::standard_compound<E, R>) {
704 }
else if constexpr (detail::direct_compound<E, R>) {
706 }
else if constexpr (detail::vectorized_compound<E, R>) {
708 }
else if constexpr (
cublas_enabled && detail::gpu_compound<E, R> && !is_scalar<E>) {
714 }
else if constexpr (
egblas_enabled && detail::gpu_compound<E, R> && is_scalar<E>) {
718 gpu_compound_sub_scalar_impl(expr, result);
733 template <
typename E,
typename R>
742 for (
size_t i = 0; i <
etl::size(result); ++i) {
743 result[i] *= expr[i];
746 result.validate_cpu();
747 result.invalidate_gpu();
759 template <
typename E,
typename R>
766 if constexpr (is_thread_safe<E>) {
770 par_exec<detail::AssignMul>(expr, result);
780 result.validate_cpu();
781 result.invalidate_gpu();
793 template <
typename E,
typename R>
800 constexpr
auto V = detail::select_vector_mode<E, R>();
802 if constexpr (is_thread_safe<E>) {
806 par_exec<detail::VectorizedAssignMul<V>>(expr, result);
816 result.validate_cpu();
817 result.invalidate_gpu();
820 #ifdef ETL_EGBLAS_MODE 830 template <
typename E,
typename R>
831 void gpu_compound_mul_impl(E& expr, R& result) {
834 result.ensure_gpu_up_to_date();
840 impl::egblas::axmy(
etl::size(result), alpha, t1.gpu_memory(), 1, result.gpu_memory(), 1);
843 result.validate_gpu();
844 result.invalidate_cpu();
857 template <
typename E,
typename R>
861 result.ensure_gpu_up_to_date();
864 impl::egblas::scalar_mul(result.gpu_memory(),
etl::size(result), 1, expr.value);
867 result.validate_gpu();
868 result.invalidate_cpu();
881 template <
typename E,
typename R>
883 if constexpr (detail::standard_compound_no_gpu<E, R>) {
885 }
else if constexpr (detail::direct_compound_no_gpu<E, R>) {
887 }
else if constexpr (detail::vectorized_compound_no_gpu<E, R>) {
897 template <
typename E,
typename R>
899 if constexpr (detail::standard_compound<E, R>) {
901 }
else if constexpr (detail::direct_compound<E, R>) {
903 }
else if constexpr (detail::vectorized_compound<E, R>) {
905 }
else if constexpr (
egblas_enabled && detail::gpu_compound<E, R> && !is_scalar<E>) {
909 gpu_compound_mul_impl(expr, result);
911 }
else if constexpr (
cublas_enabled && detail::gpu_compound<E, R> && is_scalar<E>) {
930 template <
typename E,
typename R>
939 for (
size_t i = 0; i <
etl::size(result); ++i) {
940 result[i] /= expr[i];
943 result.validate_cpu();
944 result.invalidate_gpu();
956 template <
typename E,
typename R>
963 if constexpr (is_thread_safe<E>) {
967 par_exec<detail::AssignDiv>(expr, result);
977 result.validate_cpu();
978 result.invalidate_gpu();
990 template <
typename E,
typename R>
997 constexpr
auto V = detail::select_vector_mode<E, R>();
999 if constexpr (is_thread_safe<E>) {
1003 par_exec<detail::VectorizedAssignDiv<V>>(expr, result);
1013 result.validate_cpu();
1014 result.invalidate_gpu();
1025 template <
typename E,
typename R>
1029 result.ensure_gpu_up_to_date();
1035 impl::egblas::axdy(
etl::size(result), alpha, t1.gpu_memory(), 1, result.gpu_memory(), 1);
1038 result.validate_gpu();
1039 result.invalidate_cpu();
1050 template <
typename E,
typename R>
1054 result.ensure_gpu_up_to_date();
1058 impl::egblas::scalar_mul(result.gpu_memory(),
etl::size(result), 1, value);
1061 result.validate_gpu();
1062 result.invalidate_cpu();
1075 template <
typename E,
typename R>
1077 if constexpr (detail::standard_compound_div_no_gpu<E, R>) {
1079 }
else if constexpr (detail::direct_compound_div_no_gpu<E, R>) {
1081 }
else if constexpr (detail::vectorized_compound_div_no_gpu<E, R>) {
1091 template <
typename E,
typename R>
1093 if constexpr (detail::standard_compound_div<E, R>) {
1095 }
else if constexpr (detail::direct_compound_div<E, R>) {
1097 }
else if constexpr (detail::vectorized_compound_div<E, R>) {
1099 }
else if constexpr (
egblas_enabled && detail::gpu_compound_div<E, R> && !is_scalar<E>) {
1105 }
else if constexpr (
cublas_enabled && detail::gpu_compound_div<E, R> && is_scalar<E>) {
1121 template <
typename E,
typename R>
1130 for (
size_t i = 0; i <
etl::size(result); ++i) {
1131 result[i] %= expr[i];
1134 result.validate_cpu();
1135 result.invalidate_gpu();
1143 template <
typename E,
typename R>
1145 if constexpr (!detail::gpu_assign<E, R>) {
1151 assign_evaluate_impl(expr, result);
1164 template <
typename Expr,
typename Result>
1167 || all_1d<Expr, Result>
1175 template <
typename Expr,
typename Result>
1177 #ifdef DEBUG_EVALUATOR 1178 std::cout << result <<
"=" << expr << std::endl;
1181 if constexpr (direct_assign_compatible<Expr, Result>) {
1182 standard_evaluator::assign_evaluate(expr, result);
1185 standard_evaluator::assign_evaluate(
transpose(expr), result);
1194 template <
typename Expr,
typename Result>
1196 #ifdef DEBUG_EVALUATOR 1197 std::cout << result <<
"+=" << expr << std::endl;
1200 if constexpr (direct_assign_compatible<Expr, Result>) {
1201 standard_evaluator::add_evaluate(expr, result);
1204 standard_evaluator::add_evaluate(
transpose(expr), result);
1213 template <
typename Expr,
typename Result>
1215 #ifdef DEBUG_EVALUATOR 1216 std::cout << result <<
"-=" << expr << std::endl;
1219 if constexpr (direct_assign_compatible<Expr, Result>) {
1220 standard_evaluator::sub_evaluate(expr, result);
1223 standard_evaluator::sub_evaluate(
transpose(expr), result);
1232 template <
typename Expr,
typename Result>
1234 #ifdef DEBUG_EVALUATOR 1235 std::cout << result <<
"*=" << expr << std::endl;
1238 if constexpr (direct_assign_compatible<Expr, Result>) {
1239 standard_evaluator::mul_evaluate(expr, result);
1242 standard_evaluator::mul_evaluate(
transpose(expr), result);
1251 template <
typename Expr,
typename Result>
1253 #ifdef DEBUG_EVALUATOR 1254 std::cout << result <<
"/=" << expr << std::endl;
1257 if constexpr (direct_assign_compatible<Expr, Result>) {
1258 standard_evaluator::div_evaluate(expr, result);
1261 standard_evaluator::div_evaluate(
transpose(expr), result);
1270 template <
typename Expr,
typename Result>
1272 #ifdef DEBUG_EVALUATOR 1273 std::cout << result <<
"%=" << expr << std::endl;
1276 if constexpr (direct_assign_compatible<Expr, Result>) {
1277 standard_evaluator::mod_evaluate(expr, result);
1280 standard_evaluator::mod_evaluate(
transpose(expr), result);
1291 template <
typename Expr>
1293 standard_evaluator::pre_assign_rhs(expr);
void gpu_compound_mul_scalar_impl(E &expr, R &result)
Multiply the result by the result of the expression.
Definition: evaluator.hpp:858
constexpr int complexity([[maybe_unused]] const E &expr) noexcept
Return the complexity of the expression.
Definition: helpers.hpp:38
void sub_evaluate_no_gpu(E &&expr, R &&result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:685
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Assign rhs to lhs.
Definition: linear_eval_functors.hpp:125
auto max(L &&lhs, R &&rhs)
Create an expression with the max value of lhs or rhs.
Definition: expression_builder.hpp:65
void standard_compound_div_impl(E &expr, R &result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:931
void gpu_compound_div_scalar_impl(E &expr, R &result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:1051
void std_assign_evaluate(Expr &&expr, Result &&result)
Evaluation of the expr into result.
Definition: evaluator.hpp:1176
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Assign rhs to lhs.
Definition: linear_eval_functors.hpp:93
void fast_assign_impl(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:173
void gpu_compound_sub_impl(E &expr, R &result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:631
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:113
void vectorized_compound_div_impl(E &expr, R &result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:991
void vectorized_compound_add_impl(E &expr, R &result)
Add the result of the expression to the result.
Definition: evaluator.hpp:393
void standard_compound_mul_impl(E &expr, R &result)
Multiply the result by the result of the expression.
Definition: evaluator.hpp:734
constexpr bool cuda_enabled
Indicates if CUDA is available.
Definition: config.hpp:94
void direct_copy(const S *first, const S *last, T *target)
Performs a direct memory copy.
Definition: memory.hpp:24
void force(Expr &&expr)
Force the internal evaluation of an expression.
Definition: evaluator.hpp:1292
void mul_evaluate(E &&expr, R &&result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:898
void standard_assign_impl(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:80
void direct_compound_sub_impl(E &expr, R &result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:562
void vectorized_compound_mul_impl(E &expr, R &result)
Multiply the result by the result of the expression.
Definition: evaluator.hpp:794
void direct_assign_impl(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:231
Contains TMP selectors to select evaluation methods based on configuration.
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Assign rhs to lhs.
Definition: linear_eval_functors.hpp:29
auto transpose(const E &value)
Returns the transpose of the given expression.
Definition: expression_builder.hpp:528
void standard_compound_sub_impl(E &expr, R &result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:536
void div_evaluate(E &&expr, R &&result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:1092
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
context & local_context()
Return the configuration context of the current thread.
Definition: context.hpp:50
constexpr bool egblas_enabled
Indicates if the EGBLAS library is available for ETL.
Definition: config.hpp:119
constexpr bool cublas_enabled
Indicates if the NVIDIA CUBLAS library is available for ETL.
Definition: config.hpp:99
void par_exec(E &&expr, R &&result)
Assign the result of the expression to the result with the given Functor, using parallel implementati...
Definition: evaluator.hpp:59
Visitor to perform local evaluation when necessary.
Definition: eval_visitors.hpp:23
constexpr size_t parallel_threshold
The minimum number of elements before considering parallel implementation.
Definition: threshold.hpp:66
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
void add_evaluate(E &&expr, R &&result)
Add the result of the expression to the result.
Definition: evaluator.hpp:503
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:156
void pre_assign_rhs(E &&expr)
Allocate temporaries and evaluate sub expressions in RHS.
Definition: evaluator.hpp:48
void add_evaluate_no_gpu(E &&expr, R &&result)
Add the result of the expression to the result.
Definition: evaluator.hpp:487
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
void mul_evaluate_no_gpu(E &&expr, R &&result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:882
void sub_evaluate(E &&expr, R &&result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:701
void gpu_compound_div_impl(E &expr, R &result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:1026
void gpu_assign_impl(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:195
void safe_ensure_cpu_up_to_date(E &&expr)
Ensure that the CPU is up to date.
Definition: helpers.hpp:278
void assign_evaluate(E &&expr, R &&result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:1144
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
void div_evaluate_no_gpu(E &&expr, R &&result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:1076
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:52
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:242
void standard_compound_add_impl(E &expr, R &result)
Add the result of the expression to the result.
Definition: evaluator.hpp:336
void direct_compound_div_impl(E &expr, R &result)
Divide the result by the result of the expression.
Definition: evaluator.hpp:957
void direct_compound_add_impl(E &expr, R &result)
Add the result of the expression to the result.
Definition: evaluator.hpp:359
void direct_compound_mul_impl(E &expr, R &result)
Multiply the result by the result of the expression.
Definition: evaluator.hpp:760
void vectorized_assign_impl(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:263
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Assign rhs to lhs.
Definition: linear_eval_functors.hpp:157
bool is_something_forced()
Indicates if some implementation is forced in the context.
Definition: context.hpp:60
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
void vectorized_compound_sub_impl(E &expr, R &result)
Subtract the result of the expression from the result.
Definition: evaluator.hpp:596
constexpr bool direct_assign_compatible
Traits indicating if a direct assign is possible.
Definition: evaluator.hpp:1165
constexpr bool parallel_support
Indicates if support for parallelization is integrated into the framework.
Definition: config.hpp:51
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Assign rhs to lhs.
Definition: linear_eval_functors.hpp:61
static void apply(L_Expr &&lhs, R_Expr &&rhs)
Compute the vectorized iterations of the loop using aligned store operations.
Definition: vec_eval_functors.hpp:199
bool engine_select_parallel([[maybe_unused]] size_t n, [[maybe_unused]] size_t threshold=parallel_threshold)
Indicates if an 1D evaluation should run in paralle.
Definition: parallel_support.hpp:679
decltype(auto) smart_gpu_compute_hint(E &expr, Y &y)
Compute the expression into a representation that is GPU up to date.
Definition: helpers.hpp:368
Contains the linear functors used by the evaluator to perform its actions.
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void mod_evaluate(E &&expr, R &&result)
Modulo the result by the result of the expression.
Definition: evaluator.hpp:1122
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
decltype(auto) smart_gpu_compute(X &x, Y &y)
Compute the expression into a representation that is GPU up to date and store this representation in ...
Definition: helpers.hpp:397
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
void fast_assign_impl_full(E &expr, R &result)
Assign the result of the expression to the result.
Definition: evaluator.hpp:98