Expression Templates Library (ETL)
fast.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 #include "etl/fast_base.hpp"
16 #include "etl/direct_fill.hpp" //direct_fill with GPU support
17 
18 namespace etl {
19 
25 template <typename T, typename ST, order SO, size_t... Dims>
26 struct fast_matrix_impl final : fast_matrix_base<fast_matrix_impl<T, ST, SO, Dims...>, T, ST, SO, Dims...>,
27  inplace_assignable<fast_matrix_impl<T, ST, SO, Dims...>>,
28  expression_able<fast_matrix_impl<T, ST, SO, Dims...>>,
29  value_testable<fast_matrix_impl<T, ST, SO, Dims...>>,
30  iterable<fast_matrix_impl<T, ST, SO, Dims...>, SO == order::RowMajor>,
31  dim_testable<fast_matrix_impl<T, ST, SO, Dims...>> {
32  static_assert(sizeof...(Dims) > 0, "At least one dimension must be specified");
33 
34 public:
35  static constexpr size_t n_dimensions = sizeof...(Dims);
36  static constexpr size_t etl_size = (Dims * ...);
37  static constexpr order storage_order = SO;
38  static constexpr bool array_impl = !matrix_detail::is_vector<ST>;
40 
41  using this_type = fast_matrix_impl<T, ST, SO, Dims...>;
42  using base_type = fast_matrix_base<this_type, T, ST, SO, Dims...>;
44  using value_type = T;
45  using storage_impl = ST;
47  using const_memory_type = const value_type*;
48 
49  using base_type::dim;
52  using base_type::size;
55 
59  template <typename V = default_vec>
60  using vec_type = typename V::template vec_type<T>;
61 
62 private:
63  using base_type::_data;
64 
65 public:
67 
71  fast_matrix_impl() noexcept : base_type() {
72  // Nothing else to init
73  }
74 
79  template <typename VT>
80  explicit fast_matrix_impl(const VT& value) noexcept requires(std::convertible_to<VT, value_type> || std::assignable_from<T&, VT>) : base_type() {
81  // Fill the matrix
82  std::fill(begin(), end(), value);
83  }
84 
89  fast_matrix_impl(std::initializer_list<value_type> l) : base_type() {
90  cpp_assert(l.size() == size(), "Cannot copy from an initializer of different size");
91 
92  std::copy(l.begin(), l.end(), begin());
93  }
94 
99  fast_matrix_impl(const storage_impl& data) : base_type(data) {
100  //Nothing else to init
101  }
102 
107  fast_matrix_impl(const fast_matrix_impl& rhs) noexcept : base_type(rhs) {
108  // Nothing else to init
109  }
110 
115  fast_matrix_impl(fast_matrix_impl&& rhs) noexcept : base_type(std::move(rhs)) {
116  // Nothing else to init
117  }
118 
123  template <std_container Container>
124  explicit fast_matrix_impl(const Container& container) requires(
125  !is_complex_t<Container> && std::convertible_to<typename Container::value_type, value_type>) :
126  base_type() {
127  validate_assign(*this, container);
128  std::copy(container.begin(), container.end(), begin());
129  }
130 
131  // Assignment
132 
138  fast_matrix_impl& operator=(const fast_matrix_impl& rhs) noexcept(assert_nothrow) {
139  // Avoid copy to self
140  if (this != &rhs) {
141  // This will handle the possible copy to GPU
142  this->_gpu = rhs._gpu;
143 
144  // If necessary, perform the actual copy to CPU
145  if (this->is_cpu_up_to_date()) {
146  _data = rhs._data;
147  }
148 
149  cpp_assert(rhs.is_cpu_up_to_date() == this->is_cpu_up_to_date(), "fast::operator= must preserve CPU status");
150  cpp_assert(rhs.is_gpu_up_to_date() == this->is_gpu_up_to_date(), "fast::operator= must preserve GPU status");
151  }
152 
153  return *this;
154  }
155 
162  // Avoid move to self
163  if (this != &rhs) {
164  // This will handle the possible copy to GPU
165  this->_gpu = std::move(rhs._gpu);
166 
167  // If necessary, perform the actual copy to CPU
168  if (this->is_cpu_up_to_date()) {
169  _data = std::move(rhs._data);
170  }
171  }
172 
173  return *this;
174  }
175 
181  template <size_t... SDims>
183  // Make sure the assign is valid
184  validate_assign(*this, rhs);
185 
186  // Since the type is different, it is handled by the
187  // evaluator which will handle all the possible cases
188  rhs.assign_to(*this);
189 
190  return *this;
191  }
192 
198  template <std_container Container>
199  fast_matrix_impl& operator=(const Container& container) noexcept requires(std::convertible_to<typename Container::value_type, value_type>) {
200  validate_assign(*this, container);
201  std::copy(container.begin(), container.end(), begin());
202 
203  this->validate_cpu();
204  this->invalidate_gpu();
205 
206  return *this;
207  }
208 
214  template <etl_expr E>
215  fast_matrix_impl& operator=(E&& e) requires(std::convertible_to<value_t<E>, value_type> && !std::same_as<std::decay_t<E>, this_type>) {
216  validate_assign(*this, e);
217 
218  // Avoid aliasing issues
219  if constexpr (!decay_traits<E>::is_linear) {
220  if (e.alias(*this)) {
221  // Create a temporary to hold the result
222  auto tmp = force_temporary_dim_only(*this);
223 
224  // Assign the expression to the temporary
225  e.assign_to(tmp);
226 
227  // Assign the temporary to this matrix
228  *this = tmp;
229  } else {
230  e.assign_to(*this);
231  }
232  } else {
233  // Direct assignment of the expression into this matrix
234  e.assign_to(*this);
235  }
236 
237  return *this;
238  }
239 
245  template <typename VT>
246  fast_matrix_impl& operator=(const VT& value) noexcept requires requires (T& lhs, VT rhs) {lhs = rhs; } {
247  direct_fill(*this, value);
248 
249  return *this;
250  }
251 
256  template <typename Y>
257  auto& gpu_compute_hint([[maybe_unused]] Y& y) {
258  this->ensure_gpu_up_to_date();
259  return *this;
260  }
261 
266  template <typename Y>
267  const auto& gpu_compute_hint([[maybe_unused]] Y& y) const {
268  this->ensure_gpu_up_to_date();
269  return *this;
270  }
271 
272  // Swap operations
273 
278  void swap(fast_matrix_impl& other) {
279  using std::swap;
280  swap(_data, other._data);
281  }
282 
289  template <typename V = default_vec>
290  void store(vec_type<V> in, size_t i) noexcept {
291  V::store(memory_start() + i, in);
292  }
293 
300  template <typename V = default_vec>
301  void storeu(vec_type<V> in, size_t i) noexcept {
302  V::storeu(memory_start() + i, in);
303  }
304 
311  template <typename V = default_vec>
312  void stream(vec_type<V> in, size_t i) noexcept {
313  V::stream(memory_start() + i, in);
314  }
315 
322  template <typename V = default_vec>
323  vec_type<V> load(size_t i) const noexcept {
324  return V::load(memory_start() + i);
325  }
326 
333  template <typename V = default_vec>
334  vec_type<V> loadu(size_t i) const noexcept {
335  return V::loadu(memory_start() + i);
336  }
337 
338  // Assignment functions
339 
344  template <typename L>
345  void assign_to(L&& lhs) const {
346  std_assign_evaluate(*this, std::forward<L>(lhs));
347  }
348 
353  template <typename L>
354  void assign_add_to(L&& lhs) const {
355  std_add_evaluate(*this, std::forward<L>(lhs));
356  }
357 
362  template <typename L>
363  void assign_sub_to(L&& lhs) const {
364  std_sub_evaluate(*this, std::forward<L>(lhs));
365  }
366 
371  template <typename L>
372  void assign_mul_to(L&& lhs) const {
373  std_mul_evaluate(*this, std::forward<L>(lhs));
374  }
375 
380  template <typename L>
381  void assign_div_to(L&& lhs) const {
382  std_div_evaluate(*this, std::forward<L>(lhs));
383  }
384 
389  template <typename L>
390  void assign_mod_to(L&& lhs) const {
391  std_mod_evaluate(*this, std::forward<L>(lhs));
392  }
393 
394  // Internals
395 
400  void visit([[maybe_unused]] const detail::evaluator_visitor& visitor) const {}
401 
408  friend std::ostream& operator<<(std::ostream& os, [[maybe_unused]] const fast_matrix_impl& matrix) {
409  if constexpr (sizeof...(Dims) == 1) {
410  return os << "V[" << concat_sizes(Dims...) << "]";
411  }
412 
413  return os << "M[" << concat_sizes(Dims...) << "]";
414  }
415 };
416 
417 #ifndef CPP_UTILS_ASSERT_EXCEPTION
418 static_assert(std::is_nothrow_default_constructible_v<fast_vector<double, 2>>, "fast_vector should be nothrow default constructible");
419 static_assert(std::is_nothrow_copy_constructible_v<fast_vector<double, 2>>, "fast_vector should be nothrow copy constructible");
420 static_assert(std::is_nothrow_move_constructible_v<fast_vector<double, 2>>, "fast_vector should be nothrow move constructible");
421 static_assert(std::is_nothrow_copy_assignable_v<fast_vector<double, 2>>, "fast_vector should be nothrow copy assignable");
422 static_assert(std::is_nothrow_move_assignable_v<fast_vector<double, 2>>, "fast_vector should be nothrow move assignable");
423 static_assert(std::is_nothrow_destructible_v<fast_vector<double, 2>>, "fast_vector should be nothrow destructible");
424 #endif
425 
433 template <size_t... Dims, typename T>
435  return fast_matrix_impl<T, std::span<T>, order::RowMajor, Dims...>(std::span<T>(memory, (Dims * ...)));
436 }
437 
443 template <typename T, typename ST, order SO, size_t... Dims>
445  lhs.swap(rhs);
446 }
447 
453 template <typename Stream, typename T, typename ST, order SO, size_t... Dims>
455  for (const auto& value : matrix) {
456  os << value;
457  }
458 }
459 
465 template <typename Stream, typename T, typename ST, order SO, size_t... Dims>
467  for (auto& value : matrix) {
468  os >> value;
469  }
470 }
471 
472 } //end of namespace etl
void swap(fast_matrix_impl< T, ST, SO, Dims... > &lhs, fast_matrix_impl< T, ST, SO, Dims... > &rhs)
Swaps the given two matrices.
Definition: fast.hpp:444
A deserializer for ETL expressions.
Definition: deserializer.hpp:16
Contains static matrix implementation.
CRTP class to inject iterators functions.
Definition: iterable.hpp:23
fast_matrix_impl() noexcept
Construction.
Definition: fast.hpp:71
fast_matrix_impl< T, std::span< T >, order::RowMajor, Dims... > fast_matrix_over(T *memory)
Create a fast_matrix of the given dimensions over the given memory.
Definition: fast.hpp:434
void visit([[maybe_unused]] const detail::evaluator_visitor &visitor) const
Apply the given visitor to this expression and its descendants.
Definition: fast.hpp:400
memory_type memory_start() noexcept
Returns a pointer to the first element in memory.
Definition: fast_base.hpp:201
friend std::ostream & operator<<(std::ostream &os, [[maybe_unused]] const fast_matrix_impl &matrix)
Prints a fast matrix type (not the contents) to the given stream.
Definition: fast.hpp:408
vec_type< V > load(size_t i) const noexcept
Load several elements of the matrix at once.
Definition: fast.hpp:323
static constexpr size_t alignment
The memory alignment.
Definition: fast.hpp:39
fast_matrix_impl< T, ST, SO, Dims... > this_type
this type
Definition: fast.hpp:41
void assign_div_to(L &&lhs) const
Divide to the given left-hand-side expression.
Definition: fast.hpp:381
fast_matrix_impl(std::initializer_list< value_type > l)
Construct a fast matrix filled with the given values.
Definition: fast.hpp:89
void std_assign_evaluate(Expr &&expr, Result &&result)
Evaluation of the expr into result.
Definition: evaluator.hpp:1176
const auto & gpu_compute_hint([[maybe_unused]] Y &y) const
Return a GPU computed version of this expression.
Definition: fast.hpp:267
void stream(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once, using non-temporal store.
Definition: fast.hpp:312
fast_matrix_impl & operator=(const fast_matrix_impl< T, ST, SO, SDims... > &rhs) noexcept
Copy assign a fast matrix from a matrix fast matrix type.
Definition: fast.hpp:182
fast_matrix_impl(fast_matrix_impl &&rhs) noexcept
Move construct a fast matrix.
Definition: fast.hpp:115
Standard memory utilities.
static constexpr bool array_impl
true if the storage is an std::arraw, false otherwise
Definition: fast.hpp:38
static constexpr size_t dim() noexcept
Returns the Dth dimension of the matrix.
Definition: fast_base.hpp:266
Define traits to get vectorization information for types when no vector mode is available.
Definition: no_vectorization.hpp:16
fast_matrix_impl & operator=(fast_matrix_impl &&rhs) noexcept
Copy assign a fast matrix.
Definition: fast.hpp:161
typename V::template vec_type< T > vec_type
The vectorization type for V.
Definition: fast.hpp:60
order
Storage order of a matrix.
Definition: order.hpp:15
void swap(fast_matrix_impl &other)
Swap the contents of the matrix with another matrix.
Definition: fast.hpp:278
void assign_add_to(L &&lhs) const
Add to the given left-hand-side expression.
Definition: fast.hpp:354
static constexpr size_t n_dimensions
The number of dimensions.
Definition: fast.hpp:35
void serialize(serializer< Stream > &os, const dyn_matrix_impl< T, SO, D > &matrix)
Serialize the given matrix using the given serializer.
Definition: dyn.hpp:688
bool is_cpu_up_to_date() const noexcept
Indicates if the CPU memory is up to date.
Definition: sub_view.hpp:777
std::string concat_sizes(Dims... sizes)
Returns a string representation of the given dimensions.
Definition: tmp.hpp:192
CRTP class to inject functions testing values of the expressions.
Definition: value_testable.hpp:26
auto end() noexcept
Return an iterator to the past-the-end element of the matrix.
Definition: iterable.hpp:59
A serializer for ETL expressions.
Definition: serializer.hpp:16
auto load(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:143
fast_matrix_impl & operator=(const Container &container) noexcept requires(std
Assign the values of the STL container to the fast matrix.
Definition: fast.hpp:199
Traits to get information about ETL types.
Definition: tmp.hpp:68
Root namespace for the ETL library.
Definition: adapter.hpp:15
void assign_to(L &&lhs) const
Assign to the given left-hand-side expression.
Definition: fast.hpp:345
void assign_mul_to(L &&lhs) const
Multiply the given left-hand-side expression.
Definition: fast.hpp:372
fast_matrix_impl & operator=(const fast_matrix_impl &rhs) noexcept(assert_nothrow)
Copy assign a fast matrix.
Definition: fast.hpp:138
void invalidate_gpu() const noexcept
Invalidates the GPU memory.
Definition: sub_view.hpp:695
CRTP class to inject functions creating new expressions.
Definition: expression_able.hpp:23
void store(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:176
void stream(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once, using non-temporal store.
Definition: dyn_matrix_view.hpp:165
T value_type
The value type.
Definition: fast.hpp:44
void deserialize(deserializer< Stream > &is, dyn_matrix_impl< T, SO, D > &matrix)
Deserialize the given matrix using the given serializer.
Definition: dyn.hpp:704
fast_matrix_impl(const fast_matrix_impl &rhs) noexcept
Copy construct a fast matrix.
Definition: fast.hpp:107
Visitor to perform local evaluation when necessary.
Definition: eval_visitors.hpp:23
void storeu(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: dyn_matrix_view.hpp:187
CRTP class to inject inplace operations to matrix and vector structures.
Definition: inplace_assignable.hpp:26
void std_mod_evaluate(Expr &&expr, Result &&result)
Compound modulo evaluation of the expr into result.
Definition: evaluator.hpp:1271
auto begin() noexcept
Return an iterator to the first element of the matrix.
Definition: iterable.hpp:46
fast_matrix_base< this_type, T, ST, SO, Dims... > base_type
The base type.
Definition: fast.hpp:42
void assign_sub_to(L &&lhs) const
Subtract from the given left-hand-side expression.
Definition: fast.hpp:363
void direct_fill(E &&mat, V value)
Fill the given ETL value class with the given value.
Definition: direct_fill.hpp:25
Definition: fast_base.hpp:87
void std_mul_evaluate(Expr &&expr, Result &&result)
Compound multiply evaluation of the expr into result.
Definition: evaluator.hpp:1233
auto loadu(size_t x) const noexcept
Load several elements of the expression at once.
Definition: dyn_matrix_view.hpp:154
Matrix with compile-time fixed dimensions.
Definition: fast.hpp:26
void storeu(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: fast.hpp:301
static constexpr order storage_order
The storage order.
Definition: fast.hpp:37
ST storage_impl
The storage implementation.
Definition: fast.hpp:45
fast_matrix_impl(const Container &container) requires(!is_complex_t< Container > &&std
Construct a fast matrix from the given STL container.
Definition: fast.hpp:124
requires(D > 0) struct dyn_base
Matrix with run-time fixed dimensions.
Definition: dyn_base.hpp:113
fast_matrix_impl(const VT &value) noexcept requires(std
Construct a fast matrix filled with the same value.
Definition: fast.hpp:80
bool is_gpu_up_to_date() const noexcept
Indicates if the GPU memory is up to date.
Definition: sub_view.hpp:785
void ensure_gpu_up_to_date() const
Copy back from the GPU to the expression memory if necessary.
Definition: dyn_matrix_view.hpp:280
void assign_mod_to(L &&lhs) const
Modulo the given left-hand-side expression.
Definition: fast.hpp:390
fast_matrix_impl & operator=(E &&e) requires(std
Assign the values of the ETL expression to the fast matrix.
Definition: fast.hpp:215
static constexpr size_t etl_size
The size of the matrix.
Definition: fast.hpp:36
void std_sub_evaluate(Expr &&expr, Result &&result)
Compound subtract evaluation of the expr into result.
Definition: evaluator.hpp:1214
memory_type memory_end() noexcept
Returns a pointer to the past-the-end element in memory.
Definition: fast_base.hpp:217
gpu_memory_handler< T > _gpu
The GPU memory handler.
Definition: fast_base.hpp:102
value_type * memory_type
The memory type.
Definition: fast.hpp:46
auto & gpu_compute_hint([[maybe_unused]] Y &y)
Return a GPU computed version of this expression.
Definition: fast.hpp:257
const value_type * const_memory_type
The const memory type.
Definition: fast.hpp:47
storage_impl _data
The storage container.
Definition: fast_base.hpp:101
typename decay_traits< E >::value_type value_t
Traits to extract the value type out of an ETL type.
Definition: tmp.hpp:81
void std_div_evaluate(Expr &&expr, Result &&result)
Compound divide evaluation of the expr into result.
Definition: evaluator.hpp:1252
Row-Major storage.
static constexpr size_t size() noexcept
Returns the size of the matrix, in O(1)
Definition: fast_base.hpp:233
void store(vec_type< V > in, size_t i) noexcept
Store several elements in the matrix at once.
Definition: fast.hpp:290
void validate_cpu() const noexcept
Validates the CPU memory.
Definition: sub_view.hpp:702
fast_matrix_impl(const storage_impl &data)
Construct a fast matrix directly from storage.
Definition: fast.hpp:99
void std_add_evaluate(Expr &&expr, Result &&result)
Compound add evaluation of the expr into result.
Definition: evaluator.hpp:1195
CRTP class to inject functions testing the dimensions.
Definition: dim_testable.hpp:45
decltype(auto) force_temporary_dim_only(E &&expr)
Force a temporary out of the expression with the same dimensions, but the content is not defined...
Definition: temporary.hpp:156
vec_type< V > loadu(size_t i) const noexcept
Load several elements of the matrix at once.
Definition: fast.hpp:334