Expression Templates Library (ETL)
slice_view.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 namespace etl {
16 
20 template <typename T>
21 requires (!fast_slice_view_able<T>)
22 struct slice_view<T>
23  : assignable<slice_view<T>, value_t<T>>, value_testable<slice_view<T>>, iterable<slice_view<T>, fast_slice_view_able<T>> {
24  using this_type = slice_view<T>;
27  using sub_type = T;
35 
36  using assignable_base_type::operator=;
39 
40 private:
41  T sub;
42  const size_t first;
43  const size_t last;
44 
45  friend struct etl_traits<slice_view>;
46 
47 public:
54  slice_view(sub_type sub, size_t first, size_t last) : sub(sub), first(first), last(last) {}
55 
61  const_return_type operator[](size_t j) const {
63  return sub[first * (etl::size(sub) / dim<0>(sub)) + j];
64  } else {
65  const auto sa = dim<0>(sub);
66  const auto ss = (etl::size(sub) / sa);
67  return sub[(j % ss) * sa + j / ss + first];
68  }
69  }
70 
76  return_type operator[](size_t j) {
78  return sub[first * (etl::size(sub) / dim<0>(sub)) + j];
79  } else {
80  const auto sa = dim<0>(sub);
81  const auto ss = (etl::size(sub) / sa);
82  return sub[(j % ss) * sa + j / ss + first];
83  }
84  }
85 
92  value_type read_flat(size_t j) const noexcept {
94  return sub.read_flat(first * (etl::size(sub) / dim<0>(sub)) + j);
95  } else {
96  const auto sa = dim<0>(sub);
97  const auto ss = (etl::size(sub) / sa);
98  return sub.read_flat((j % ss) * sa + j / ss + first);
99  }
100  }
101 
107  template <typename... S>
108  const_return_type operator()(size_t i, S... args) const requires(sizeof...(S) + 1 == decay_traits<sub_type>::dimensions()) {
109  return sub(i + first, static_cast<size_t>(args)...);
110  }
111 
117  template <typename... S>
118  return_type operator()(size_t i, S... args) requires(sizeof...(S) + 1 == decay_traits<sub_type>::dimensions()){
119  return sub(i + first, static_cast<size_t>(args)...);
120  }
121 
127  auto operator()(size_t x) const requires sub_capable<sub_type> {
128  return etl::sub(*this, x);
129  }
130 
137  template <typename V = default_vec>
138  auto load(size_t x) const noexcept {
139  return sub.template loadu<V>(x + first * (etl::size(sub) / etl::dim<0>(sub)));
140  }
141 
148  template <typename V = default_vec>
149  auto loadu(size_t x) const noexcept {
150  return sub.template loadu<V>(x + first * (etl::size(sub) / etl::dim<0>(sub)));
151  }
152 
158  template <typename E>
159  bool alias(const E& rhs) const noexcept {
160  return sub.alias(rhs);
161  }
162 
163  // Assignment functions
164 
169  template <typename L>
170  void assign_to(L&& lhs) const {
171  std_assign_evaluate(*this, std::forward<L>(lhs));
172  }
173 
178  template <typename L>
179  void assign_add_to(L&& lhs) const {
180  std_add_evaluate(*this, std::forward<L>(lhs));
181  }
182 
187  template <typename L>
188  void assign_sub_to(L&& lhs) const {
189  std_sub_evaluate(*this, std::forward<L>(lhs));
190  }
191 
196  template <typename L>
197  void assign_mul_to(L&& lhs) const {
198  std_mul_evaluate(*this, std::forward<L>(lhs));
199  }
200 
205  template <typename L>
206  void assign_div_to(L&& lhs) const {
207  std_div_evaluate(*this, std::forward<L>(lhs));
208  }
209 
214  template <typename L>
215  void assign_mod_to(L&& lhs) const {
216  std_mod_evaluate(*this, std::forward<L>(lhs));
217  }
218 
219  // Internals
220 
225  void visit(detail::evaluator_visitor& visitor) const {
226  sub.visit(visitor);
227  }
228 
233  void ensure_cpu_up_to_date() const {
234  // Need to ensure sub value
235  sub.ensure_cpu_up_to_date();
236  }
237 
242  void ensure_gpu_up_to_date() const {
243  // Need to ensure both LHS and RHS
244  sub.ensure_gpu_up_to_date();
245  }
246 };
247 
251 template <typename T>
252 requires (fast_slice_view_able<T>)
253 struct slice_view<T>
254  : assignable<slice_view<T>, value_t<T>>, value_testable<slice_view<T>>, iterable<slice_view<T>, true> {
255  using this_type = slice_view<T>;
258  using sub_type = T;
259  using value_type = value_t<sub_type>;
264  using iterator = memory_type;
266 
270  template <typename V = default_vec>
271  using vec_type = typename V::template vec_type<value_type>;
272 
273  using assignable_base_type::operator=;
276 
277 private:
278  T sub;
279  const size_t first;
280  const size_t last;
281  const size_t sub_size;
282 
283  mutable memory_type memory;
284 
285  mutable bool cpu_up_to_date;
286  mutable bool gpu_up_to_date;
287 
288  friend struct etl_traits<slice_view>;
289 
290 public:
297  slice_view(sub_type sub, size_t first, size_t last) : sub(sub), first(first), last(last), sub_size((etl::size(sub) / etl::dim<0>(sub)) * (last - first)) {
298  // Accessing the memory through fast sub views means evaluation
299  if constexpr (decay_traits<sub_type>::is_temporary) {
300  standard_evaluator::pre_assign_rhs(*this);
301  }
302 
303  this->memory = this->sub.memory_start() + first * (etl::size(this->sub) / etl::dim<0>(this->sub));
304 
305  // A sub view inherits the CPU/GPU from parent
306  this->cpu_up_to_date = this->sub.is_cpu_up_to_date();
307  this->gpu_up_to_date = this->sub.is_gpu_up_to_date();
308 
309  cpp_assert(this->memory, "Invalid memory");
310  }
311 
315  ~slice_view() {
316  if (this->memory) {
317  // Propagate the status on the parent
318  if (!this->cpu_up_to_date) {
319  if (sub.is_gpu_up_to_date()) {
320  sub.invalidate_cpu();
321  } else {
322  // If the GPU is not up to date, cannot invalidate the CPU too
324  }
325  }
326 
327  if (!this->gpu_up_to_date) {
328  if (sub.is_cpu_up_to_date()) {
329  sub.invalidate_gpu();
330  } else {
331  // If the GPU is not up to date, cannot invalidate the CPU too
333  }
334  }
335  }
336  }
337 
343  const_return_type operator[](size_t j) const {
345  return memory[j];
346  }
347 
353  return_type operator[](size_t j) {
355  invalidate_gpu();
356  return memory[j];
357  }
358 
365  value_type read_flat(size_t j) const noexcept {
367  return memory[j];
368  }
369 
375  template <typename... S>
376  const_return_type operator()(size_t i, S... args) const requires(sizeof...(S) + 1 == decay_traits<sub_type>::dimensions()) {
378  return memory[dyn_index(*this, i, args...)];
379  }
380 
386  template <typename... S>
387  return_type operator()(size_t i, S... args) requires(sizeof...(S) + 1 == decay_traits<sub_type>::dimensions()) {
389  invalidate_gpu();
390  return memory[dyn_index(*this, i, args...)];
391  }
392 
398  auto operator()(size_t x) const requires sub_capable<sub_type> {
399  return etl::sub(*this, x);
400  }
401 
408  template <typename V = default_vec>
409  auto load(size_t x) const noexcept {
410  return V::loadu(memory + x);
411  }
412 
419  template <typename V = default_vec>
420  void store(vec_type<V> in, size_t x) noexcept {
421  return V::storeu(memory + x, in);
422  }
423 
430  template <typename V = default_vec>
431  void storeu(vec_type<V> in, size_t x) noexcept {
432  return V::storeu(memory + x, in);
433  }
434 
441  template <typename V = default_vec>
442  void stream(vec_type<V> in, size_t x) noexcept {
443  //TODO If the slice view is aligned (at compile-time), use stream store here
444  return V::storeu(memory + x, in);
445  }
446 
453  template <typename V = default_vec>
454  auto loadu(size_t x) const noexcept {
455  return V::loadu(memory + x);
456  }
457 
463  template <typename E>
464  bool alias(const E& rhs) const noexcept {
465  if constexpr (is_dma<E>) {
466  return memory_alias(memory_start(), memory_end(), rhs.memory_start(), rhs.memory_end());
467  } else {
468  return sub.alias(rhs);
469  }
470  }
471 
476  template <typename Y>
477  auto& gpu_compute_hint([[maybe_unused]] Y& y) {
478  this->ensure_gpu_up_to_date();
479  return *this;
480  }
481 
486  template <typename Y>
487  const auto& gpu_compute_hint([[maybe_unused]] Y& y) const {
488  this->ensure_gpu_up_to_date();
489  return *this;
490  }
491 
496  memory_type memory_start() noexcept {
497  return memory;
498  }
499 
504  const_memory_type memory_start() const noexcept {
505  return memory;
506  }
507 
512  memory_type memory_end() noexcept {
513  return memory + sub_size;
514  }
515 
520  const_memory_type memory_end() const noexcept {
521  return memory + sub_size;
522  }
523 
524  // Assignment functions
525 
530  template <typename L>
531  void assign_to(L&& lhs) const {
532  std_assign_evaluate(*this, std::forward<L>(lhs));
533  }
534 
539  template <typename L>
540  void assign_add_to(L&& lhs) const {
541  std_add_evaluate(*this, std::forward<L>(lhs));
542  }
543 
548  template <typename L>
549  void assign_sub_to(L&& lhs) const {
550  std_sub_evaluate(*this, std::forward<L>(lhs));
551  }
552 
557  template <typename L>
558  void assign_mul_to(L&& lhs) const {
559  std_mul_evaluate(*this, std::forward<L>(lhs));
560  }
561 
566  template <typename L>
567  void assign_div_to(L&& lhs) const {
568  std_div_evaluate(*this, std::forward<L>(lhs));
569  }
570 
575  template <typename L>
576  void assign_mod_to(L&& lhs) const {
577  std_mod_evaluate(*this, std::forward<L>(lhs));
578  }
579 
580  // Internals
581 
586  void visit(detail::evaluator_visitor& visitor) const {
587  sub.visit(visitor);
588  }
589 
590  // GPU functions
591 
596  value_type* gpu_memory() const noexcept {
597  return sub.gpu_memory() + first * (etl::size(sub) / etl::dim<0>(sub));
598  }
599 
603  void gpu_evict() const noexcept {
604  sub.gpu_evict();
605  }
606 
610  void invalidate_cpu() const noexcept {
611  this->cpu_up_to_date = false;
612  }
613 
617  void invalidate_gpu() const noexcept {
618  this->gpu_up_to_date = false;
619  }
620 
624  void validate_cpu() const noexcept {
625  this->cpu_up_to_date = true;
626  }
627 
631  void validate_gpu() const noexcept {
632  this->gpu_up_to_date = true;
633  }
634 
639  void ensure_gpu_allocated() const {
640  // Allocate is done by the sub
641  sub.ensure_gpu_allocated();
642  }
643 
647  void ensure_gpu_up_to_date() const {
648  sub.ensure_gpu_allocated();
649 
650 #ifdef ETL_CUDA
651  if (!this->gpu_up_to_date) {
652  cuda_check_assert(cudaMemcpy(const_cast<std::remove_const_t<value_type>*>(gpu_memory()),
653  const_cast<std::remove_const_t<value_type>*>(memory_start()), sub_size * sizeof(value_type), cudaMemcpyHostToDevice));
654 
655  this->gpu_up_to_date = true;
656 
657  inc_counter("gpu:slice:cpu_to_gpu");
658  }
659 #endif
660  }
661 
666  void ensure_cpu_up_to_date() const {
667 #ifdef ETL_CUDA
668  if (!this->cpu_up_to_date) {
669  cuda_check_assert(cudaMemcpy(const_cast<std::remove_const_t<value_type>*>(memory_start()),
670  const_cast<std::remove_const_t<value_type>*>(gpu_memory()), sub_size * sizeof(value_type), cudaMemcpyDeviceToHost));
671 
672  inc_counter("gpu:slice:gpu_to_cpu");
673  }
674 #endif
675 
676  this->cpu_up_to_date = true;
677  }
678 
683  void gpu_copy_from([[maybe_unused]] const value_type* new_gpu_memory) const {
684  cpp_assert(sub.gpu_memory(), "GPU must be allocated before copy");
685 
686 #ifdef ETL_CUDA
687  cuda_check_assert(cudaMemcpy(const_cast<std::remove_const_t<value_type>*>(gpu_memory()), const_cast<std::remove_const_t<value_type>*>(new_gpu_memory),
688  sub_size * sizeof(value_type), cudaMemcpyDeviceToDevice));
689 #endif
690 
691  gpu_up_to_date = true;
692  cpu_up_to_date = false;
693  }
694 
699  bool is_cpu_up_to_date() const noexcept {
700  return cpu_up_to_date;
701  }
702 
707  bool is_gpu_up_to_date() const noexcept {
708  return gpu_up_to_date;
709  }
710 };
711 
715 template <typename T>
716 struct etl_traits<etl::slice_view<T>> {
718  using sub_expr_t = std::decay_t<T>;
721 
722  static constexpr bool is_etl = true;
723  static constexpr bool is_transformer = false;
724  static constexpr bool is_view = true;
725  static constexpr bool is_magic_view = false;
726  static constexpr bool is_fast = false;
727  static constexpr bool is_linear = sub_traits::is_linear;
728  static constexpr bool is_thread_safe = sub_traits::is_thread_safe;
729  static constexpr bool is_value = false;
730  static constexpr bool is_direct = fast_slice_view_able<T>;
731  static constexpr bool is_generator = false;
732  static constexpr bool is_padded = false;
733  static constexpr bool is_aligned = false;
734  static constexpr bool is_temporary = sub_traits::is_temporary;
735  static constexpr bool gpu_computable = is_direct;
736  static constexpr order storage_order = sub_traits::storage_order;
737 
743  template <vector_mode_t V>
744  static constexpr bool vectorizable = sub_traits::template vectorizable<V>&& storage_order == order::RowMajor;
745 
751  static size_t size(const expr_t& v) {
752  return (sub_traits::size(v.sub) / sub_traits::dim(v.sub, 0)) * (v.last - v.first);
753  }
754 
761  static size_t dim(const expr_t& v, size_t d) {
762  if (d == 0) {
763  return v.last - v.first;
764  } else {
765  return sub_traits::dim(v.sub, d);
766  }
767  }
768 
773  static constexpr size_t dimensions() {
774  return sub_traits::dimensions();
775  }
776 
781  static constexpr int complexity() noexcept {
782  return -1;
783  }
784 };
785 
786 } //end of namespace etl
CRTP class to inject iterators functions.
Definition: iterable.hpp:23
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:217
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
Definition: expr_fwd.hpp:71
void std_assign_evaluate(Expr &&expr, Result &&result)
Evaluation of the expr into result.
Definition: evaluator.hpp:1176
bool alias(const E &rhs) const noexcept
Test if this expression aliases with the given expression.
Definition: dyn_matrix_view.hpp:197
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:244
value_t< sub_type > value_type
The value contained in the expression.
Definition: dyn_matrix_view.hpp:31
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:235
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
const_memory_t< sub_type > const_memory_type
The const memory access type.
Definition: dyn_matrix_view.hpp:33
void assign_to(L &&lhs) const
Assign to the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:208
order
Storage order of a matrix.
Definition: order.hpp:15
typename V::template vec_type< value_type > vec_type
The vectorization type for V.
Definition: dyn_matrix_view.hpp:43
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: slice_view.hpp:781
bool is_cpu_up_to_date() const noexcept
Indicates if the CPU memory is up to date.
Definition: sub_view.hpp:777
std::conditional_t< std::is_lvalue_reference_v< S >, const value_t< T > &, value_t< T > > const_return_helper
Definition: traits.hpp:872
void gpu_copy_from([[maybe_unused]] const value_type *new_gpu_memory) const
Copy from GPU to GPU.
Definition: sub_view.hpp:761
T sub_type
The sub type.
Definition: dyn_matrix_view.hpp:30
memory_t< sub_type > memory_type
The memory acess type.
Definition: dyn_matrix_view.hpp:32
void gpu_evict() const noexcept
Evict the expression from GPU.
Definition: sub_view.hpp:681
CRTP class to inject functions testing values of the expressions.
Definition: value_testable.hpp:26
static size_t size(const expr_t &v)
Returns the size of the given expression.
Definition: slice_view.hpp:751
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
auto end() noexcept
Return an iterator to the past-the-end element of the matrix.
Definition: iterable.hpp:59
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
std::conditional_t< std::is_const_v< std::remove_reference_t< S > >, const value_t< T > &, std::conditional_t< std::is_lvalue_reference_v< S > &&!std::is_const_v< T >, value_t< T > &, value_t< T > >> return_helper
Definition: traits.hpp:866
bool memory_alias(const P1 *a_begin, const P1 *a_end, const P2 *b_begin, const P2 *b_end)
Test if two memory ranges overlap.
Definition: helpers.hpp:264
void invalidate_gpu() const noexcept
Invalidates the GPU memory.
Definition: sub_view.hpp:695
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
std::conditional_t< std::is_const_v< std::remove_reference_t< S > >, typename std::decay_t< S >::const_memory_type, typename std::decay_t< S >::memory_type > memory_t
Traits to extract the direct memory type out of an ETL type.
Definition: tmp.hpp:88
void store(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:176
void stream(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once, using non-temporal store.
Definition: dyn_matrix_view.hpp:165
typename std::decay_t< S >::const_memory_type const_memory_t
Traits to extract the direct const memory type out of an ETL type.
Definition: tmp.hpp:94
void ensure_cpu_up_to_date() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: dyn_matrix_view.hpp:271
Visitor to perform local evaluation when necessary.
Definition: eval_visitors.hpp:23
static size_t dim(const expr_t &v, size_t d)
Returns the dth dimension of the given expression.
Definition: slice_view.hpp:761
std::decay_t< T > sub_expr_t
The sub expression type.
Definition: slice_view.hpp:718
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
Configurable iterator for ETL expressions.
Definition: iterator.hpp:24
void storeu(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:187
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
auto begin() noexcept
Return an iterator to the first element of the matrix.
Definition: iterable.hpp:46
const_return_type operator()(size_t j) const
Access to the element at the given position.
Definition: dyn_matrix_view.hpp:89
value_type read_flat(size_t j) const noexcept
Returns the value at the given index This function never has side effects.
Definition: dyn_matrix_view.hpp:111
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
auto loadu(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:154
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
void visit(detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: dyn_matrix_view.hpp:263
constexpr size_t size(const E &expr) noexcept
Returns the size of the given ETL expression.
Definition: helpers.hpp:108
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
bool is_gpu_up_to_date() const noexcept
Indicates if the GPU memory is up to date.
Definition: sub_view.hpp:785
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
Definition: expr_fwd.hpp:59
auto sub(E &&value, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n, size_t o, size_t p) -> sub_matrix_4d< detail::build_identity_type< E >, false >
Returns view representing a sub matrix view of the given expression.
Definition: view_expression_builder.hpp:100
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
CRTP class to inject assign operations to matrix and vector structures.
Definition: assignable.hpp:25
typename etl_traits< sub_expr_t >::value_type value_type
The value type.
Definition: slice_view.hpp:720
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:253
const_return_type operator[](size_t j) const
Returns the element at the given index.
Definition: dyn_matrix_view.hpp:71
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
static constexpr size_t dimensions()
Returns the number of expressions for this type.
Definition: slice_view.hpp:773
const auto & gpu_compute_hint([[maybe_unused]] Y &y) const
Return a GPU computed version of this expression.
Definition: sub_view.hpp:653
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
Row-Major storage.
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
void validate_cpu() const noexcept
Validates the CPU memory.
Definition: sub_view.hpp:702
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
const_return_helper< sub_type, decltype(std::declval< sub_type >()[0])> const_return_type
The const type return by the view.
Definition: dyn_matrix_view.hpp:35
return_helper< sub_type, decltype(std::declval< sub_type >()[0])> return_type
The type returned by the view.
Definition: dyn_matrix_view.hpp:34
size_t dyn_index([[maybe_unused]] const T &expression, size_t i) noexcept(assert_nothrow)
Compute the index for a 1D dynamic matrix.
Definition: index.hpp:187
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:226