16 #include "etl/impl/cublas/cuda.hpp" 25 template <matrix T,
bool Aligned>
27 struct sub_view<T, Aligned> final : iterable<sub_view<T, Aligned>, false>,
28 assignable<sub_view<T, Aligned>, value_t<T>>,
29 value_testable<sub_view<T, Aligned>>,
46 template <
typename V = default_vec>
49 using assignable_base_type::operator=;
56 const size_t sub_offset;
76 return storage_order ==
order::RowMajor ? sub_expr[sub_offset + j] : sub_expr[i + dim<0>(sub_expr) * j];
85 return storage_order ==
order::RowMajor ? sub_expr[sub_offset + j] : sub_expr[i + dim<0>(sub_expr) * j];
95 return storage_order ==
order::RowMajor ? sub_expr.read_flat(sub_offset + j) : sub_expr.read_flat(i + dim<0>(sub_expr) * j);
103 template <
typename... S>
106 return sub_expr(i, static_cast<size_t>(args)...);
114 template <
typename... S>
117 return sub_expr(i, static_cast<size_t>(args)...);
126 return sub(*
this, x);
135 template <
typename V = default_vec>
137 return sub_expr.template storeu<V>(in, x + sub_offset);
146 template <
typename V = default_vec>
148 return sub_expr.template storeu<V>(in, x + sub_offset);
157 template <
typename V = default_vec>
159 return sub_expr.template storeu<V>(in, x + sub_offset);
168 template <
typename V = default_vec>
170 load(
size_t x)
const noexcept {
171 return sub_expr.template loadu<V>(x + sub_offset);
180 template <
typename V = default_vec>
182 loadu(
size_t x)
const noexcept {
183 return sub_expr.template loadu<V>(x + sub_offset);
191 template <
typename E>
192 bool alias(
const E& rhs)
const noexcept {
193 return sub_expr.alias(rhs);
203 size_t& unsafe_dimension_access(
size_t x) {
204 return sub_expr.unsafe_dimension_access(x + 1);
213 template <
typename L>
222 template <
typename L>
231 template <
typename L>
240 template <
typename L>
249 template <
typename L>
258 template <
typename L>
270 sub_expr.visit(visitor);
279 sub_expr.ensure_cpu_up_to_date();
288 sub_expr.ensure_gpu_up_to_date();
297 friend std::ostream&
operator<<(std::ostream& os,
const sub_view& v) {
298 return os <<
"sub(" << v.sub_expr <<
", " << v.i <<
")";
306 template <matrix T,
bool Aligned>
329 template <
typename V = default_vec>
334 using assignable_base_type::operator=;
339 const size_t sub_size;
346 mutable bool cpu_up_to_date;
347 mutable bool gpu_up_to_date;
360 standard_evaluator::pre_assign_rhs(*
this);
363 this->memory = this->sub_expr.memory_start() + i * sub_size;
366 this->cpu_up_to_date = this->sub_expr.is_cpu_up_to_date();
367 this->gpu_up_to_date = this->sub_expr.is_gpu_up_to_date();
369 cpp_assert(this->memory,
"Invalid memory");
375 if (!this->cpu_up_to_date) {
376 if (sub_expr.is_gpu_up_to_date()) {
377 sub_expr.invalidate_cpu();
384 if (!this->gpu_up_to_date) {
385 if (sub_expr.is_cpu_up_to_date()) {
386 sub_expr.invalidate_gpu();
432 template <
typename... S>
436 return memory[
dyn_index(*
this, args...)];
444 template <
typename... S>
448 return memory[
dyn_index(*
this, args...)];
457 return sub(*
this, x);
466 template <
typename V = default_vec>
477 template <
typename V = default_vec>
488 template <
typename V = default_vec>
500 template <
typename V = default_vec>
511 template <
typename V = default_vec>
521 template <
typename E>
522 bool alias(
const E& rhs)
const noexcept {
523 if constexpr (is_dma<E>) {
524 return memory_alias(memory_start(), memory_end(), rhs.memory_start(), rhs.memory_end());
526 return sub_expr.alias(rhs);
534 ETL_STRONG_INLINE(
memory_type) memory_start() noexcept {
550 ETL_STRONG_INLINE(
memory_type) memory_end() noexcept {
551 return memory + sub_size;
559 return memory + sub_size;
569 size_t& unsafe_dimension_access(
size_t x) {
570 return sub_expr.unsafe_dimension_access(x + 1);
577 template <
size_t... I>
578 std::array<size_t, decay_traits<this_type>::dimensions()> dim_array(std::index_sequence<I...>)
const {
588 template <
typename L>
597 template <
typename L>
606 template <
typename L>
615 template <
typename L>
624 template <
typename L>
633 template <
typename L>
642 template <
typename Y>
652 template <
typename Y>
665 sub_expr.visit(visitor);
675 return sub_expr.gpu_memory() + i * sub_size;
682 sub_expr.gpu_evict();
689 this->cpu_up_to_date =
false;
696 this->gpu_up_to_date =
false;
703 this->cpu_up_to_date =
true;
710 this->gpu_up_to_date =
true;
719 sub_expr.ensure_gpu_allocated();
726 sub_expr.ensure_gpu_allocated();
729 if (!this->gpu_up_to_date) {
730 cuda_check_assert(cudaMemcpy(
const_cast<std::remove_const_t<value_type>*
>(
gpu_memory()),
731 const_cast<std::remove_const_t<value_type>*
>(memory_start()), sub_size *
sizeof(
value_type), cudaMemcpyHostToDevice));
733 this->gpu_up_to_date =
true;
746 if (!this->cpu_up_to_date) {
747 cuda_check_assert(cudaMemcpy(
const_cast<std::remove_const_t<value_type>*
>(memory_start()),
748 const_cast<std::remove_const_t<value_type>*
>(
gpu_memory()), sub_size *
sizeof(
value_type), cudaMemcpyDeviceToHost));
754 this->cpu_up_to_date =
true;
762 cpp_assert(sub_expr.gpu_memory(),
"GPU must be allocated before copy");
765 cuda_check_assert(cudaMemcpy(
const_cast<std::remove_const_t<value_type>*
>(
gpu_memory()),
const_cast<std::remove_const_t<value_type>*
>(new_gpu_memory),
766 sub_size *
sizeof(
value_type), cudaMemcpyDeviceToDevice));
769 gpu_up_to_date =
true;
770 cpu_up_to_date =
false;
778 return cpu_up_to_date;
786 return gpu_up_to_date;
796 return os <<
"sub(" << v.sub_expr <<
", " << v.i <<
")";
803 template <
typename T,
bool Aligned>
810 static constexpr
bool is_etl =
true;
814 static constexpr
bool is_fast = sub_traits::is_fast;
815 static constexpr
bool is_linear = sub_traits::is_linear;
817 static constexpr
bool is_value =
false;
818 static constexpr
bool is_direct =
820 static constexpr
bool is_generator =
false;
821 static constexpr
bool is_padded =
false;
822 static constexpr
bool is_aligned =
false;
823 static constexpr
bool is_temporary = sub_traits::is_temporary;
824 static constexpr
bool gpu_computable = fast_sub_view_able<T>;
825 static constexpr
order storage_order = sub_traits::storage_order;
832 template <vector_mode_t V>
833 static constexpr
bool vectorizable = sub_traits::template vectorizable<V>&& storage_order ==
order::RowMajor;
858 static constexpr
size_t size() noexcept {
859 return sub_traits::size() / sub_traits::template dim<0>();
868 static constexpr
size_t dim() noexcept {
869 return sub_traits::template dim<D + 1>();
877 return sub_traits::dimensions() - 1;
CRTP class to inject iterators functions.
Definition: iterable.hpp:23
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:217
void ensure_gpu_allocated() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: sub_view.hpp:717
typename sub_traits::value_type value_type
The value type of the expression.
Definition: sub_view.hpp:808
void std_assign_evaluate(Expr &&expr, Result &&result)
Evaluation of the expr into result.
Definition: evaluator.hpp:1176
bool alias(const E &rhs) const noexcept
Test if this expression aliases with the given expression.
Definition: dyn_matrix_view.hpp:197
void assign_div_to(L &&lhs) const
Divide the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:244
value_t< sub_type > value_type
The value contained in the expression.
Definition: dyn_matrix_view.hpp:31
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:235
constexpr bool is_magic_view
Traits indicating if the given ETL type is a magic view expression.
Definition: traits.hpp:311
const_memory_t< sub_type > const_memory_type
The const memory access type.
Definition: dyn_matrix_view.hpp:33
void assign_to(L &&lhs) const
Assign to the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:208
static size_t dim(const expr_t &v, size_t d) noexcept
Returns the dth dimension of the given expression.
Definition: sub_view.hpp:850
std::decay_t< T > sub_expr_t
The sub expression type.
Definition: sub_view.hpp:806
order
Storage order of a matrix.
Definition: order.hpp:15
typename V::template vec_type< value_type > vec_type
The vectorization type for V.
Definition: dyn_matrix_view.hpp:43
bool is_cpu_up_to_date() const noexcept
Indicates if the CPU memory is up to date.
Definition: sub_view.hpp:777
std::conditional_t< std::is_lvalue_reference_v< S >, const value_t< T > &, value_t< T > > const_return_helper
Definition: traits.hpp:872
void gpu_copy_from([[maybe_unused]] const value_type *new_gpu_memory) const
Copy from GPU to GPU.
Definition: sub_view.hpp:761
T sub_type
The sub type.
Definition: dyn_matrix_view.hpp:30
memory_t< sub_type > memory_type
The memory acess type.
Definition: dyn_matrix_view.hpp:32
void gpu_evict() const noexcept
Evict the expression from GPU.
Definition: sub_view.hpp:681
CRTP class to inject functions testing values of the expressions.
Definition: value_testable.hpp:26
constexpr bool is_fast
Traits to test if the given ETL expresion type is fast (sizes known at compile-time) ...
Definition: traits.hpp:588
auto end() noexcept
Return an iterator to the past-the-end element of the matrix.
Definition: iterable.hpp:59
static constexpr size_t dimensions() noexcept
Returns the number of expressions for this type.
Definition: sub_view.hpp:876
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
std::conditional_t< std::is_const_v< std::remove_reference_t< S > >, const value_t< T > &, std::conditional_t< std::is_lvalue_reference_v< S > &&!std::is_const_v< T >, value_t< T > &, value_t< T > >> return_helper
Definition: traits.hpp:866
static constexpr size_t dimensions()
Return the number of dimensions of the expression.
Definition: traits_base.hpp:31
bool memory_alias(const P1 *a_begin, const P1 *a_end, const P2 *b_begin, const P2 *b_end)
Test if two memory ranges overlap.
Definition: helpers.hpp:264
void invalidate_gpu() const noexcept
Invalidates the GPU memory.
Definition: sub_view.hpp:695
auto dim(E &&value, size_t i) -> detail::identity_helper< E, dim_view< detail::build_identity_type< E >, D >>
Return a view representing the ith Dth dimension.
Definition: view_expression_builder.hpp:25
std::conditional_t< std::is_const_v< std::remove_reference_t< S > >, typename std::decay_t< S >::const_memory_type, typename std::decay_t< S >::memory_type > memory_t
Traits to extract the direct memory type out of an ETL type.
Definition: tmp.hpp:88
void store(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:176
void stream(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once, using non-temporal store.
Definition: dyn_matrix_view.hpp:165
std::ostream & operator<<(std::ostream &os, const etl::complex< T > &c)
Outputs a textual representation of the complex number in the given stream.
Definition: complex.hpp:576
typename std::decay_t< S >::const_memory_type const_memory_t
Traits to extract the direct const memory type out of an ETL type.
Definition: tmp.hpp:94
void ensure_cpu_up_to_date() const
Ensures that the GPU memory is allocated and that the GPU memory is up to date (to undefined value)...
Definition: dyn_matrix_view.hpp:271
Visitor to perform local evaluation when necessary.
Definition: eval_visitors.hpp:23
void invalidate_cpu() const noexcept
Invalidates the CPU memory.
Definition: sub_view.hpp:688
Configurable iterator for ETL expressions.
Definition: iterator.hpp:24
void storeu(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:187
CRTP class to inject inplace operations to matrix and vector structures.
Definition: inplace_assignable.hpp:26
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
auto begin() noexcept
Return an iterator to the first element of the matrix.
Definition: iterable.hpp:46
const_return_type operator()(size_t j) const
Access to the element at the given position.
Definition: dyn_matrix_view.hpp:89
value_type read_flat(size_t j) const noexcept
Returns the value at the given index This function never has side effects.
Definition: dyn_matrix_view.hpp:111
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
auto loadu(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:154
constexpr bool is_transformer
Traits indicating if the given ETL type is a transformer expression.
Definition: traits.hpp:297
void visit(detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: dyn_matrix_view.hpp:263
sub_view(sub_type sub_expr, size_t i)
Construct a new sub_view over the given sub expression.
Definition: sub_view.hpp:68
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
constexpr bool is_view
Traits indicating if the given ETL type is a view expression.
Definition: traits.hpp:304
bool is_gpu_up_to_date() const noexcept
Indicates if the GPU memory is up to date.
Definition: sub_view.hpp:785
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
Definition: expr_fwd.hpp:59
void validate_gpu() const noexcept
Validates the GPU memory.
Definition: sub_view.hpp:709
CRTP class to inject assign operations to matrix and vector structures.
Definition: assignable.hpp:25
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
static size_t size(const expr_t &v) noexcept
Returns the size of the given expression.
Definition: sub_view.hpp:840
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:253
static constexpr int complexity() noexcept
Estimate the complexity of computation.
Definition: sub_view.hpp:884
const_return_type operator[](size_t j) const
Returns the element at the given index.
Definition: dyn_matrix_view.hpp:71
constexpr bool is_thread_safe
Traits to test if the given ETL expresion type is thread safe.
Definition: traits.hpp:687
static constexpr size_t dim() noexcept
Returns the Dth dimension of an expression of this type.
Definition: sub_view.hpp:868
const auto & gpu_compute_hint([[maybe_unused]] Y &y) const
Return a GPU computed version of this expression.
Definition: sub_view.hpp:653
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
void inc_counter([[maybe_unused]] const char *name)
Increase the given counter.
Definition: counters.hpp:25
void validate_cpu() const noexcept
Validates the CPU memory.
Definition: sub_view.hpp:702
value_type * gpu_memory() const noexcept
Return GPU memory of this expression, if any.
Definition: sub_view.hpp:674
size_t subsize(const E &expr)
Returns the sub-size of the given ETL expression, i.e. the size not considering the first dimension...
Definition: helpers.hpp:118
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
static constexpr size_t size() noexcept
Returns the size of an expression of this fast type.
Definition: sub_view.hpp:858
const_return_helper< sub_type, decltype(std::declval< sub_type >()[0])> const_return_type
The const type return by the view.
Definition: dyn_matrix_view.hpp:35
return_helper< sub_type, decltype(std::declval< sub_type >()[0])> return_type
The type returned by the view.
Definition: dyn_matrix_view.hpp:34
size_t dyn_index([[maybe_unused]] const T &expression, size_t i) noexcept(assert_nothrow)
Compute the index for a 1D dynamic matrix.
Definition: index.hpp:187
void assign_sub_to(L &&lhs) const
Sub from the given left-hand-side expression.
Definition: dyn_matrix_view.hpp:226