10 #ifndef EIGEN_COMPLEX_SSE_H 11 #define EIGEN_COMPLEX_SSE_H 20 EIGEN_STRONG_INLINE Packet2cf() {}
21 EIGEN_STRONG_INLINE
explicit Packet2cf(
const __m128& a) : v(a) {}
25 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
27 typedef Packet2cf type;
46 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {
size=2}; };
48 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_add_ps(a.v,b.v)); }
49 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_sub_ps(a.v,b.v)); }
50 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a)
52 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
53 return Packet2cf(_mm_xor_ps(a.v,mask));
55 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
57 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
58 return Packet2cf(_mm_xor_ps(a.v,mask));
61 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
64 #ifdef EIGEN_VECTORIZE_SSE3 65 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
66 _mm_mul_ps(_mm_movehdup_ps(a.v),
67 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
72 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
73 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
74 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
75 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
79 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_and_ps(a.v,b.v)); }
80 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_or_ps(a.v,b.v)); }
81 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_xor_ps(a.v,b.v)); }
82 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
84 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
85 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
87 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
90 #if EIGEN_GNUC_AT_MOST(4,2) 92 res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
93 #elif EIGEN_GNUC_AT_LEAST(4,6) 95 #pragma GCC diagnostic push 96 #pragma GCC diagnostic ignored "-Wuninitialized" 97 res.v = _mm_loadl_pi(res.v, (
const __m64*)&from);
98 #pragma GCC diagnostic pop 100 res.v = _mm_loadl_pi(res.v, (
const __m64*)&from);
102 return Packet2cf(_mm_movelh_ps(res.v,res.v));
105 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
107 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
108 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
110 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { _mm_prefetch((
const char*)(addr), _MM_HINT_T0); }
112 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
114 #if EIGEN_GNUC_AT_MOST(4,3) 117 EIGEN_ALIGN16 std::complex<float> res[2];
118 _mm_store_ps((
float*)res, a.v);
121 std::complex<float> res;
122 _mm_storel_pi((__m64*)&res, a.v);
127 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a) {
return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
129 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
131 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
134 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
136 return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
139 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
141 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
145 struct palign_impl<Offset,Packet2cf>
147 static EIGEN_STRONG_INLINE
void run(Packet2cf& first,
const Packet2cf& second)
151 first.v = _mm_movehl_ps(first.v, first.v);
152 first.v = _mm_movelh_ps(first.v, second.v);
157 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
159 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 160 {
return padd(pmul(x,y),c); }
162 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 164 #ifdef EIGEN_VECTORIZE_SSE3 165 return internal::pmul(a, pconj(b));
167 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
168 return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
169 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
170 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
175 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
177 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 178 {
return padd(pmul(x,y),c); }
180 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 182 #ifdef EIGEN_VECTORIZE_SSE3 183 return internal::pmul(pconj(a), b);
185 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
186 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
187 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
188 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
193 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
195 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 196 {
return padd(pmul(x,y),c); }
198 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 200 #ifdef EIGEN_VECTORIZE_SSE3 201 return pconj(internal::pmul(a, b));
203 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
204 return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
205 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
206 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
214 {
return padd(c, pmul(x,y)); }
217 {
return Packet2cf(Eigen::internal::pmul(x, y.v)); }
223 {
return padd(c, pmul(x,y)); }
226 {
return Packet2cf(Eigen::internal::pmul(x.v, y)); }
233 __m128 s = _mm_mul_ps(b.v,b.v);
234 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
239 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
247 EIGEN_STRONG_INLINE
explicit Packet1cd(
const __m128d& a) : v(a) {}
279 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
286 #ifdef EIGEN_VECTORIZE_SSE3 287 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
288 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
289 vec2d_swizzle1(b.v, 1, 0))));
291 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
292 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
293 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
294 vec2d_swizzle1(b.v, 1, 0)), mask)));
304 template<> EIGEN_STRONG_INLINE
Packet1cd pload <Packet1cd>(
const std::complex<double>* from)
305 { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
306 template<> EIGEN_STRONG_INLINE
Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from)
307 { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
308 template<> EIGEN_STRONG_INLINE
Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
309 {
return ploadu<Packet1cd>(&from); }
311 template<> EIGEN_STRONG_INLINE
Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
314 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
315 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
317 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { _mm_prefetch((
const char*)(addr), _MM_HINT_T0); }
319 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
321 EIGEN_ALIGN16
double res[2];
322 _mm_store_pd(res, a.v);
323 return std::complex<double>(res[0],res[1]);
328 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a)
338 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a)
356 {
return padd(pmul(x,y),c); }
360 #ifdef EIGEN_VECTORIZE_SSE3 361 return internal::pmul(a, pconj(b));
363 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
364 return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
365 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
366 vec2d_swizzle1(b.v, 1, 0))));
374 {
return padd(pmul(x,y),c); }
378 #ifdef EIGEN_VECTORIZE_SSE3 379 return internal::pmul(pconj(a), b);
381 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
382 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
383 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
384 vec2d_swizzle1(b.v, 1, 0)), mask)));
392 {
return padd(pmul(x,y),c); }
396 #ifdef EIGEN_VECTORIZE_SSE3 397 return pconj(internal::pmul(a, b));
399 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
400 return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
401 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
402 vec2d_swizzle1(b.v, 1, 0))));
410 {
return padd(c, pmul(x,y)); }
413 {
return Packet1cd(Eigen::internal::pmul(x, y.v)); }
419 {
return padd(c, pmul(x,y)); }
422 {
return Packet1cd(Eigen::internal::pmul(x.v, y)); }
429 __m128d s = _mm_mul_pd(b.v,b.v);
430 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
442 #endif // EIGEN_COMPLEX_SSE_H Definition: ForwardDeclarations.h:151
Definition: XprHelper.h:101
iterative scaling algorithm to equilibrate rows and column norms in matrices
Definition: TestIMU_Common.h:87
Definition: TypeSafeIdHash.h:44
Definition: GenericPacketMath.h:71
Definition: Complex.h:244
detail::size< coerce_list< Ts... >> size
Get the size of a list (number of elements.)
Definition: Size.h:56
Definition: GenericPacketMath.h:42
Definition: Complex.h:353
Definition: Complex.h:167
Definition: BandTriangularSolver.h:13
Definition: GenericPacketMath.h:308