Expression Templates Library (ETL)
transpose.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014-2023 Baptiste Wicht
3 // Distributed under the terms of the MIT License.
4 // (See accompanying file LICENSE or copy at
5 // http://opensource.org/licenses/MIT)
6 //=======================================================================
7 
13 #pragma once
14 
15 #ifdef ETL_MKL_MODE
16 #include "mkl.h"
17 #endif
18 
19 namespace etl::impl::blas {
20 
21 #ifdef ETL_MKL_MODE
22 
28 template <typename A, typename C>
29 void mkl_otrans(A&& a, C&& c) {
30  a.ensure_cpu_up_to_date();
31 
32  auto mem_c = c.memory_start();
33  auto mem_a = a.memory_start();
34 
35  if constexpr (all_single_precision<A, C>) {
36  if (is_row_major<A>) {
37  mkl_somatcopy('R', 'T', etl::dim<0>(a), etl::dim<1>(a), 1.0f, mem_a, etl::dim<1>(a), mem_c, etl::dim<0>(a));
38  } else {
39  mkl_somatcopy('C', 'T', etl::dim<0>(a), etl::dim<1>(a), 1.0f, mem_a, etl::dim<0>(a), mem_c, etl::dim<1>(a));
40  }
41  } else if constexpr (all_double_precision<A, C>) {
42  if (is_row_major<A>) {
43  mkl_domatcopy('R', 'T', etl::dim<0>(a), etl::dim<1>(a), 1.0, mem_a, etl::dim<1>(a), mem_c, etl::dim<0>(a));
44  } else {
45  mkl_domatcopy('C', 'T', etl::dim<0>(a), etl::dim<1>(a), 1.0, mem_a, etl::dim<0>(a), mem_c, etl::dim<1>(a));
46  }
47  }
48 
49  c.invalidate_gpu();
50 }
51 
56 template <typename C>
57 void mkl_itrans(C&& c) {
58  c.ensure_cpu_up_to_date();
59 
60  if constexpr (is_single_precision<C>) {
61  if (is_row_major<C>) {
62  mkl_simatcopy('R', 'T', etl::dim<0>(c), etl::dim<1>(c), 1.0f, c.memory_start(), etl::dim<1>(c), etl::dim<0>(c));
63  } else {
64  mkl_simatcopy('C', 'T', etl::dim<0>(c), etl::dim<1>(c), 1.0f, c.memory_start(), etl::dim<0>(c), etl::dim<1>(c));
65  }
66  } else if constexpr (is_double_precision<C>) {
67  if (is_row_major<C>) {
68  mkl_dimatcopy('R', 'T', etl::dim<0>(c), etl::dim<1>(c), 1.0, c.memory_start(), etl::dim<1>(c), etl::dim<0>(c));
69  } else {
70  mkl_dimatcopy('C', 'T', etl::dim<0>(c), etl::dim<1>(c), 1.0, c.memory_start(), etl::dim<0>(c), etl::dim<1>(c));
71  }
72  }
73 
74  c.invalidate_gpu();
75 }
76 
81 template <typename C>
82 void inplace_square_transpose(C&& c) {
83  if constexpr (is_dma<C> && is_floating<C>) {
84  mkl_itrans(c);
85  } else {
86  cpp_unreachable("Invalid call to blas:inplace_square_transpose");
87  }
88 }
89 
94 template <typename C>
96  if constexpr (is_dma<C> && is_floating<C>) {
97  mkl_otrans(force_temporary(c), c);
98  } else {
99  cpp_unreachable("Invalid call to blas:inplace_rectangular_transpose");
100  }
101 }
102 
108 template <typename A, typename C>
109 void transpose(A&& a, C&& c) {
110  if constexpr (all_dma<A, C> && all_floating<A, C>) {
111  auto mem_c = c.memory_start();
112  auto mem_a = a.memory_start();
113 
114  // Delegate aliasing transpose to inplace algorithm
115  if (mem_c == mem_a) {
116  if (etl::dim<0>(a) == etl::dim<1>(a)) {
117  mkl_itrans(c);
118  } else {
119  mkl_otrans(force_temporary(c), c);
120  }
121  } else {
122  mkl_otrans(a, c);
123  }
124  } else {
125  cpp_unreachable("Invalid call to blas:tranpose");
126  }
127 }
128 
129 #else
130 
131 //COVERAGE_EXCLUDE_BEGIN
132 
137 template <typename C>
138 void inplace_square_transpose([[maybe_unused]] C&& c) {
139  cpp_unreachable("MKL not enabled/available");
140 }
141 
146 template <typename C>
147 void inplace_rectangular_transpose([[maybe_unused]] C&& c) {
148  cpp_unreachable("MKL not enabled/available");
149 }
150 
156 template <typename A, typename C>
157 void transpose([[maybe_unused]] A&& a, [[maybe_unused]] C&& c) {
158  cpp_unreachable("MKL not enabled/available");
159 }
160 
161  //COVERAGE_EXCLUDE_END
162 
163 #endif
164 
165 } //end of namespace etl::impl::blas
void inplace_square_transpose([[maybe_unused]] C &&c)
Inplace transposition of the square matrix c.
Definition: transpose.hpp:138
void inplace_rectangular_transpose([[maybe_unused]] C &&c)
Inplace transposition of the rectangular matrix c.
Definition: transpose.hpp:147
auto transpose(const E &value)
Returns the transpose of the given expression.
Definition: expression_builder.hpp:528
Definition: dot.hpp:19
decltype(auto) force_temporary(E &&expr)
Force a temporary out of the expression.
Definition: temporary.hpp:91