Expression Templates Library (ETL)
base_temporary_expr.hpp
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
8 #pragma once
9 
10 #include <memory> //For shared_ptr
11 
12 #include "etl/concepts.hpp"
13 #include "etl/iterator.hpp"
14 
15 namespace etl {
16 
17 namespace temporary_detail {
18 
24 template <typename E, bool Fast>
25 struct expr_result;
26 
30 template <typename E>
31 struct expr_result<E, false> {
36 };
37 
41 template <typename E>
42 struct expr_result<E, true> {
47 };
48 
53 template <bool Fast, typename E>
54 using expr_result_t = typename expr_result<E, Fast && is_fast<E>>::type;
55 
56 } // namespace temporary_detail
57 
65 template <typename D, bool Fast>
66 struct base_temporary_expr : value_testable<D>, dim_testable<D>, iterable<const D, true> {
67  using derived_t = D;
69  using result_type = temporary_detail::expr_result_t<Fast, D>;
71  using const_memory_type = const value_type*;
72 
73 protected:
74  mutable std::shared_ptr<bool> evaluated;
75  mutable std::shared_ptr<result_type> _c;
76 
77 public:
81  base_temporary_expr() : evaluated(std::make_shared<bool>(false)) {
82  // Nothing else to init
83  }
84 
88  base_temporary_expr(const base_temporary_expr& expr) = default;
89 
95  base_temporary_expr(base_temporary_expr&& rhs) : evaluated(std::move(rhs.evaluated)), _c(std::move(rhs._c)) {
96  //Nothing else to change
97  }
98 
99  //Expressions are invariant
100  base_temporary_expr& operator=(const base_temporary_expr& /*e*/) = delete;
101  base_temporary_expr& operator=(base_temporary_expr&& /*e*/) = delete;
102 
106  template <typename VV = default_vec>
107  using vec_type = typename VV::template vec_type<value_type>;
108 
109 protected:
114  derived_t& as_derived() noexcept {
115  return *static_cast<derived_t*>(this);
116  }
117 
122  const derived_t& as_derived() const noexcept {
123  return *static_cast<const derived_t*>(this);
124  }
125 
131  bool is_allocated() const noexcept {
132  return _c.get();
133  }
134 
140  bool is_evaluated() const noexcept {
141  return *evaluated;
142  }
143 
144 protected:
150  void evaluate() const {
151  if (!*evaluated) {
152  cpp_assert(is_allocated(), "The result has not been allocated");
153  as_derived().assign_to(*_c);
154  *evaluated = true;
155  }
156  }
157 
161  void allocate_temporary() const {
162  if (!_c) {
163  _c.reset(allocate());
164  }
165  }
166 
170  template <size_t... I>
171  result_type* dyn_allocate(std::index_sequence<I...> /*seq*/) const {
172  return new result_type(decay_traits<derived_t>::dim(as_derived(), I)...);
173  }
174 
179  if constexpr (is_fast<derived_t>) {
180  return new result_type;
181  } else {
182  return dyn_allocate(std::make_index_sequence<decay_traits<derived_t>::dimensions()>());
183  }
184  }
185 
186 public:
187  //Apply the expression
188 
194  value_type operator[](size_t i) const {
195  return result()[i];
196  }
197 
204  value_type read_flat(size_t i) const {
205  return result().read_flat(i);
206  }
207 
213  template <size_c... S>
214  value_type operator()(S... args) const requires(sizeof...(S) == safe_dimensions<derived_t>) {
215  return result()(args...);
216  }
217 
223  auto operator()(size_t i) const requires sub_capable<derived_t> {
224  return sub(as_derived(), i);
225  }
226 
233  auto slice(size_t first, size_t last) noexcept {
234  return slice(*this, first, last);
235  }
236 
243  auto slice(size_t first, size_t last) const noexcept {
244  return slice(*this, first, last);
245  }
246 
253  template <typename VV = default_vec>
254  vec_type<VV> load(size_t i) const noexcept {
255  return VV::loadu(memory_start() + i);
256  }
257 
264  template <typename VV = default_vec>
265  vec_type<VV> loadu(size_t i) const noexcept {
266  return VV::loadu(memory_start() + i);
267  }
268 
273  template <typename Y>
274  auto& gpu_compute_hint(Y& y) {
275  cpu_unused(y);
276  this->ensure_gpu_up_to_date();
277  return as_derived();
278  }
279 
284  template <typename Y>
285  const auto& gpu_compute_hint(Y& y) const {
286  cpu_unused(y);
287  this->ensure_gpu_up_to_date();
288  return as_derived();
289  }
290 
291  // Direct memory access
292 
297  memory_type memory_start() noexcept {
298  return result().memory_start();
299  }
300 
305  const_memory_type memory_start() const noexcept {
306  return result().memory_start();
307  }
308 
313  memory_type memory_end() noexcept {
314  return result().memory_end();
315  }
316 
321  const_memory_type memory_end() const noexcept {
322  return result().memory_end();
323  }
324 
329  value_type* gpu_memory() const noexcept {
330  return result().gpu_memory();
331  }
332 
336  void gpu_evict() const noexcept {
337  result().gpu_evict();
338  }
339 
343  void invalidate_cpu() const noexcept {
344  result().invalidate_cpu();
345  }
346 
350  void invalidate_gpu() const noexcept {
351  result().invalidate_gpu();
352  }
353 
357  void validate_cpu() const noexcept {
358  result().validate_cpu();
359  }
360 
364  void validate_gpu() const noexcept {
365  result().validate_gpu();
366  }
367 
372  void ensure_gpu_allocated() const {
373  result().ensure_gpu_allocated();
374  }
375 
379  void ensure_gpu_up_to_date() const {
380  result().ensure_gpu_up_to_date();
381  }
382 
387  void ensure_cpu_up_to_date() const {
388  result().ensure_cpu_up_to_date();
389  }
390 
395  void gpu_copy_from(const value_type* gpu_memory) const {
396  result().gpu_copy_from(gpu_memory);
397  }
398 
403  bool is_cpu_up_to_date() const noexcept {
404  return result().is_cpu_up_to_date();
405  }
406 
411  bool is_gpu_up_to_date() const noexcept {
412  return result().is_gpu_up_to_date();
413  }
414 
415 protected:
420  result_type& result() {
421  cpp_assert(is_allocated(), "The result has not been allocated");
422  cpp_assert(*evaluated, "The result has not been evaluated");
423  return *_c;
424  }
425 
430  const result_type& result() const {
431  cpp_assert(is_allocated(), "The result has not been allocated");
432  cpp_assert(*evaluated, "The result has not been evaluated");
433  return *_c;
434  }
435 };
436 
442 template <typename D, etl_expr A, bool Fast = true>
446 
447  A _a;
448 
449  using base_type::evaluated;
450 
455  explicit base_temporary_expr_un(A a) : _a(a) {
456  //Nothing else to init
457  }
458 
464  //Nothing else to init
465  }
466 
471  base_temporary_expr_un(base_temporary_expr_un&& e) noexcept : base_type(std::move(e)), _a(e._a) {
472  //Nothing else to init
473  }
474 
480  template <typename E>
481  bool alias(const E& rhs) const {
482  return _a.alias(rhs);
483  }
484 
489  std::add_lvalue_reference_t<A> a() {
490  return _a;
491  }
492 
497  cpp::add_const_lvalue_t<A> a() const {
498  return _a;
499  }
500 
501  // Internals
502 
507  void visit(detail::evaluator_visitor& visitor) const {
508  // If the expression is already evaluated, no need to
509  // recurse through the tree
510  if (*evaluated) {
511  return;
512  }
513 
514  this->allocate_temporary();
515 
516  _a.visit(visitor);
517 
518  this->evaluate();
519  }
520 };
521 
528 template <typename D, etl_expr A, etl_expr B, bool Fast = true>
532 
533  A _a;
534  B _b;
535 
536  using base_type::evaluated;
537 
543  explicit base_temporary_expr_bin(A a, B b) : _a(a), _b(b) {
544  //Nothing else to init
545  }
546 
551  base_temporary_expr_bin(const base_temporary_expr_bin& e) : base_type(e), _a(e._a), _b(e._b) {
552  //Nothing else to init
553  }
554 
559  base_temporary_expr_bin(base_temporary_expr_bin&& e) noexcept : base_type(std::move(e)), _a(e._a), _b(e._b) {
560  //Nothing else to init
561  }
562 
568  template <typename E>
569  bool alias(const E& rhs) const {
570  return _a.alias(rhs) || _b.alias(rhs);
571  }
572 
577  std::add_lvalue_reference_t<A> a() {
578  return _a;
579  }
580 
585  cpp::add_const_lvalue_t<A> a() const {
586  return _a;
587  }
588 
593  std::add_lvalue_reference_t<B> b() {
594  return _b;
595  }
596 
601  cpp::add_const_lvalue_t<B> b() const {
602  return _b;
603  }
604 
605  // Internals
606 
611  void visit(detail::evaluator_visitor& visitor) const {
612  // If the expression is already evaluated, no need to
613  // recurse through the tree
614  if (*evaluated) {
615  return;
616  }
617 
618  this->allocate_temporary();
619 
620  _a.visit(visitor);
621  _b.visit(visitor);
622 
623  this->evaluate();
624  }
625 };
626 
633 template <typename D, etl_expr A, etl_expr B, etl_expr C, bool Fast = true>
637 
638  A _a;
639  B _b;
640  C _c;
641 
642  using base_type::evaluated;
643 
644 public:
651  base_temporary_expr_tern(A a, B b, C c) : _a(a), _b(b), _c(c) {
652  //Nothing else to init
653  }
654 
659  base_temporary_expr_tern(const base_temporary_expr_tern& e) : base_type(e), _a(e._a), _b(e._b), _c(e._c) {
660  //Nothing else to init
661  }
662 
667  base_temporary_expr_tern(base_temporary_expr_tern&& e) noexcept : base_type(std::move(e)), _a(e._a), _b(e._b), _c(e._c) {
668  //Nothing else to init
669  }
670 
676  template <typename E>
677  bool alias(const E& rhs) const {
678  return _a.alias(rhs) || _b.alias(rhs) || _c.alias(rhs);
679  }
680 
681 protected:
686  std::add_lvalue_reference_t<A> a() {
687  return _a;
688  }
689 
694  cpp::add_const_lvalue_t<A> a() const {
695  return _a;
696  }
697 
702  std::add_lvalue_reference_t<B> b() {
703  return _b;
704  }
705 
710  cpp::add_const_lvalue_t<B> b() const {
711  return _b;
712  }
713 
718  std::add_lvalue_reference_t<C> c() {
719  return _c;
720  }
721 
726  cpp::add_const_lvalue_t<C> c() const {
727  return _c;
728  }
729 
730 public:
731  // Internals
732 
737  void visit(detail::evaluator_visitor& visitor) const {
738  // If the expression is already evaluated, no need to
739  // recurse through the tree
740  if (*evaluated) {
741  return;
742  }
743 
744  this->allocate_temporary();
745 
746  _a.visit(visitor);
747  _b.visit(visitor);
748  _c.visit(visitor);
749 
750  this->evaluate();
751  }
752 };
753 
754 } //end of namespace etl
void allocate_temporary() const
Allocate the necessary temporaries, if necessary.
Definition: base_temporary_expr.hpp:161
cpp::add_const_lvalue_t< A > a() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:497
CRTP class to inject iterators functions.
Definition: iterable.hpp:23
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:702
result_type * allocate() const
Allocate the temporary.
Definition: base_temporary_expr.hpp:178
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:489
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
value_type * memory_type
The memory type.
Definition: base_temporary_expr.hpp:70
B _b
The sub expression reference.
Definition: base_temporary_expr.hpp:534
auto slice(E &&value, size_t first, size_t last) -> slice_view< detail::build_identity_type< E >>
Returns view representing a slice view of the given expression.
Definition: view_expression_builder.hpp:112
A temporary expression base.
Definition: base_temporary_expr.hpp:66
bool alias(const E &rhs) const
Test if this expression aliases with the given expression.
Definition: base_temporary_expr.hpp:569
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:533
base_temporary_expr_bin(const base_temporary_expr_bin &e)
Construct a new expression by copy.
Definition: base_temporary_expr.hpp:551
void visit(detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: base_temporary_expr.hpp:611
D D
The number of dimensions.
Definition: dyn_matrix_view.hpp:24
cpp::add_const_lvalue_t< A > a() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:694
void visit(detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: base_temporary_expr.hpp:507
bool is_allocated() const noexcept
Indicates if the temporary has been allocated.
Definition: base_temporary_expr.hpp:131
bool alias(const E &rhs) const
Test if this expression aliases with the given expression.
Definition: base_temporary_expr.hpp:677
const derived_t & as_derived() const noexcept
Returns a reference to the derived object, i.e. the object using the CRTP injector.
Definition: base_temporary_expr.hpp:122
const value_type * const_memory_type
The const memory type.
Definition: base_temporary_expr.hpp:71
A _a
The first sub expression reference.
Definition: base_temporary_expr.hpp:638
Abstract base class for temporary binary expression.
Definition: base_temporary_expr.hpp:529
bool is_cpu_up_to_date() const noexcept
Indicates if the CPU memory is up to date.
Definition: sub_view.hpp:777
std::add_lvalue_reference_t< B > b()
Returns the sub expression.
Definition: base_temporary_expr.hpp:593
void gpu_copy_from([[maybe_unused]] const value_type *new_gpu_memory) const
Copy from GPU to GPU.
Definition: sub_view.hpp:761
A _a
The sub expression reference.
Definition: base_temporary_expr.hpp:447
dyn_base< this_type, T, D > base_type
The base type.
Definition: sparse.hpp:227
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:686
void gpu_evict() const noexcept
Evict the expression from GPU.
Definition: sub_view.hpp:681
CRTP class to inject functions testing values of the expressions.
Definition: value_testable.hpp:26
typename VV::template vec_type< value_type > vec_type
The vectorization type for VV.
Definition: base_temporary_expr.hpp:107
base_temporary_expr_un(A a)
Construct a new expression.
Definition: base_temporary_expr.hpp:455
bool is_evaluated() const noexcept
Indicates if the temporary has been evaluated.
Definition: base_temporary_expr.hpp:140
Build a fast_dyn_matrix type from some expression.
Definition: temporary.hpp:26
derived_t & as_derived() noexcept
Returns a reference to the derived object, i.e. the object using the CRTP injector.
Definition: base_temporary_expr.hpp:114
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
base_temporary_expr_bin(A a, B b)
Construct a new expression.
Definition: base_temporary_expr.hpp:543
static constexpr size_t dimensions()
Return the number of dimensions of the expression.
Definition: traits_base.hpp:31
cpp::add_const_lvalue_t< A > a() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:585
result_type * dyn_allocate(std::index_sequence< I... >) const
Allocate the dynamic temporary.
Definition: base_temporary_expr.hpp:171
void invalidate_gpu() const noexcept
Invalidates the GPU memory.
Definition: sub_view.hpp:695
void evaluate() const
Evaluate the expression, if not evaluated.
Definition: base_temporary_expr.hpp:150
void ensure_cpu_up_to_date() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: dyn_matrix_view.hpp:271
bool alias(const E &rhs) const
Test if this expression aliases with the given expression.
Definition: base_temporary_expr.hpp:481
Visitor to perform local evaluation when necessary.
Definition: eval_visitors.hpp:23
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
temporary_detail::expr_result_t< Fast, conv_4d_valid_filter_expr< A, B, S1, S2, P1, P2, Flipped > > result_type
The result type.
Definition: base_temporary_expr.hpp:69
Abstract base class for temporary ternary expression.
Definition: base_temporary_expr.hpp:634
const_return_type operator()(size_t j) const
Access to the element at the given position.
Definition: dyn_matrix_view.hpp:89
typename decay_traits< conv_4d_valid_filter_expr< A, B, S1, S2, P1, P2, Flipped > >::value_type value_type
The value type.
Definition: base_temporary_expr.hpp:68
Matrix with run-time fixed dimensions.
Definition: dyn.hpp:26
auto loadu(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:154
value_type operator[](size_t i) const
Returns the element at the given index.
Definition: base_temporary_expr.hpp:194
cpp::add_const_lvalue_t< B > b() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:601
base_temporary_expr(base_temporary_expr &&rhs)
Move construct a base_temporary_expr The right hand side cannot be used anymore after ths move...
Definition: base_temporary_expr.hpp:95
cpp::add_const_lvalue_t< C > c() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:726
base_temporary_expr_tern(A a, B b, C c)
Construct a new expression.
Definition: base_temporary_expr.hpp:651
base_temporary_expr()
Construct a new base_temporary_expr.
Definition: base_temporary_expr.hpp:81
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
D derived_t
The derived type.
Definition: dim_testable.hpp:46
bool is_gpu_up_to_date() const noexcept
Indicates if the GPU memory is up to date.
Definition: sub_view.hpp:785
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
void visit(detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: base_temporary_expr.hpp:737
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
base_temporary_expr_tern(base_temporary_expr_tern &&e) noexcept
Construct a new expression by move.
Definition: base_temporary_expr.hpp:667
base_temporary_expr_un(base_temporary_expr_un &&e) noexcept
Construct a new expression by move.
Definition: base_temporary_expr.hpp:471
Abstract base class for temporary unary expression.
Definition: base_temporary_expr.hpp:443
auto allocate(size_t size, mangling_faker< S >=mangling_faker< S >())
Allocate an array of the given size for the given type.
Definition: allocator.hpp:80
Iterator implementation for ETL expressions.
B _b
The second sub expression reference.
Definition: base_temporary_expr.hpp:639
std::shared_ptr< bool > evaluated
Indicates if the expression has been evaluated.
Definition: base_temporary_expr.hpp:74
C _c
The third sub expression reference.
Definition: base_temporary_expr.hpp:640
cpp::add_const_lvalue_t< B > b() const
Returns the sub expression.
Definition: base_temporary_expr.hpp:710
typename detail::build_fast_dyn_matrix_type< E, std::make_index_sequence< decay_traits< E >::dimensions()> >::type type
The built type for the given Subs.
Definition: base_temporary_expr.hpp:46
const auto & gpu_compute_hint([[maybe_unused]] Y &y) const
Return a GPU computed version of this expression.
Definition: sub_view.hpp:653
std::shared_ptr< result_type > _c
The result reference.
Definition: base_temporary_expr.hpp:75
std::add_lvalue_reference_t< A > a()
Returns the sub expression.
Definition: base_temporary_expr.hpp:577
void validate_cpu() const noexcept
Validates the CPU memory.
Definition: sub_view.hpp:702
base_temporary_expr_tern(const base_temporary_expr_tern &e)
Construct a new expression by copy.
Definition: base_temporary_expr.hpp:659
base_temporary_expr_un(const base_temporary_expr_un &e)
Construct a new expression by copy.
Definition: base_temporary_expr.hpp:463
std::add_lvalue_reference_t< C > c()
Returns the sub expression.
Definition: base_temporary_expr.hpp:718
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674
Traits to build the result type of a temporary expression.
Definition: base_temporary_expr.hpp:25
CRTP class to inject functions testing the dimensions.
Definition: dim_testable.hpp:45
base_temporary_expr_bin(base_temporary_expr_bin &&e) noexcept
Construct a new expression by move.
Definition: base_temporary_expr.hpp:559
value_type read_flat(size_t i) const
Returns the value at the given index This function never alters the state of the container.
Definition: base_temporary_expr.hpp:204