36 #ifndef EIGEN_HALF_CUDA_H 37 #define EIGEN_HALF_CUDA_H 39 #if __cplusplus > 199711L 40 #define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type() 42 #define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type() 52 #if !defined(EIGEN_HAS_CUDA_FP16) 56 EIGEN_DEVICE_FUNC
__half() {}
57 explicit EIGEN_DEVICE_FUNC
__half(
unsigned short raw) : x(raw) {}
63 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
__half raw_uint16_to_half(
unsigned short x);
64 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
__half float_to_half_rtne(
float ff);
65 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
float half_to_float(
__half h);
70 EIGEN_DEVICE_FUNC half_base(
const __half& h) :
__half(h) {}
77 #if !defined(EIGEN_HAS_CUDA_FP16) 81 EIGEN_DEVICE_FUNC
half() {}
86 explicit EIGEN_DEVICE_FUNC half(
bool b)
89 explicit EIGEN_DEVICE_FUNC half(
const T& val)
91 explicit EIGEN_DEVICE_FUNC half(
float f)
94 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
bool)
const {
96 return (x & 0x7fff) != 0;
98 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
signed char)
const {
99 return static_cast<signed char>(half_impl::half_to_float(*
this));
101 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned char)
const {
102 return static_cast<unsigned char>(half_impl::half_to_float(*
this));
104 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
short)
const {
105 return static_cast<short>(half_impl::half_to_float(*
this));
107 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned short)
const {
108 return static_cast<unsigned short>(half_impl::half_to_float(*
this));
110 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
int)
const {
111 return static_cast<int>(half_impl::half_to_float(*
this));
113 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned int)
const {
114 return static_cast<unsigned int>(half_impl::half_to_float(*
this));
116 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
long)
const {
117 return static_cast<long>(half_impl::half_to_float(*
this));
119 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned long)
const {
120 return static_cast<unsigned long>(half_impl::half_to_float(*
this));
122 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
long long)
const {
123 return static_cast<long long>(half_impl::half_to_float(*
this));
125 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned long long)
const {
126 return static_cast<unsigned long long>(half_to_float(*
this));
128 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
float)
const {
129 return half_impl::half_to_float(*
this);
131 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
double)
const {
132 return static_cast<double>(half_impl::half_to_float(*
this));
135 EIGEN_DEVICE_FUNC half& operator=(
const half& other) {
141 namespace half_impl {
143 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 150 __device__
half operator + (
const half& a,
const half& b) {
156 __device__
half operator - (
const half& a,
const half& b) {
159 __device__
half operator / (
const half& a,
const half& b) {
160 float num = __half2float(a);
161 float denom = __half2float(b);
162 return __float2half(num / denom);
164 __device__
half operator - (
const half& a) {
183 __device__
bool operator == (
const half& a,
const half& b) {
186 __device__
bool operator != (
const half& a,
const half& b) {
189 __device__
bool operator < (
const half& a,
const half& b) {
192 __device__
bool operator <= (
const half& a,
const half& b) {
195 __device__
bool operator > (
const half& a,
const half& b) {
198 __device__
bool operator >= (
const half& a,
const half& b) {
202 #else // Emulate support for half floats 207 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half operator + (
const half& a,
const half& b) {
208 return half(
float(a) +
float(b));
211 return half(
float(a) *
float(b));
213 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half operator - (
const half& a,
const half& b) {
214 return half(
float(a) -
float(b));
216 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half operator / (
const half& a,
const half& b) {
217 return half(
float(a) /
float(b));
219 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half operator - (
const half& a) {
221 result.x = a.x ^ 0x8000;
224 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half& operator += (
half& a,
const half& b) {
225 a =
half(
float(a) +
float(b));
228 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half& operator *= (
half& a,
const half& b) {
229 a =
half(
float(a) *
float(b));
232 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half& operator -= (
half& a,
const half& b) {
233 a =
half(
float(a) -
float(b));
236 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half& operator /= (
half& a,
const half& b) {
237 a =
half(
float(a) /
float(b));
240 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator == (
const half& a,
const half& b) {
241 return float(a) == float(b);
243 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator != (
const half& a,
const half& b) {
244 return float(a) != float(b);
246 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator < (
const half& a,
const half& b) {
247 return float(a) < float(b);
249 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator <= (
const half& a,
const half& b) {
250 return float(a) <= float(b);
252 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator > (
const half& a,
const half& b) {
253 return float(a) > float(b);
255 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator >= (
const half& a,
const half& b) {
256 return float(a) >= float(b);
259 #endif // Emulate support for half floats 263 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half operator / (
const half& a,
Index b) {
264 return half(static_cast<float>(a) / static_cast<float>(b));
272 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
__half raw_uint16_to_half(
unsigned short x) {
283 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
__half float_to_half_rtne(
float ff) {
284 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 285 return __float2half(ff);
287 #elif defined(EIGEN_HAS_FP16_C) 289 h.x = _cvtss_sh(ff, 0);
295 const FP32 f32infty = { 255 << 23 };
296 const FP32 f16max = { (127 + 16) << 23 };
297 const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
298 unsigned int sign_mask = 0x80000000u;
300 o.x =
static_cast<unsigned short>(0x0u);
302 unsigned int sign = f.u & sign_mask;
310 if (f.u >= f16max.u) {
311 o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00;
313 if (f.u < (113 << 23)) {
317 f.f += denorm_magic.f;
320 o.x =
static_cast<unsigned short>(f.u - denorm_magic.u);
322 unsigned int mant_odd = (f.u >> 13) & 1;
325 f.u += ((
unsigned int)(15 - 127) << 23) + 0xfff;
329 o.x =
static_cast<unsigned short>(f.u >> 13);
333 o.x |=
static_cast<unsigned short>(sign >> 16);
338 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
float half_to_float(
__half h) {
339 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 340 return __half2float(h);
342 #elif defined(EIGEN_HAS_FP16_C) 343 return _cvtsh_ss(h.x);
346 const FP32 magic = { 113 << 23 };
347 const unsigned int shifted_exp = 0x7c00 << 13;
350 o.u = (h.x & 0x7fff) << 13;
351 unsigned int exp = shifted_exp & o.u;
352 o.u += (127 - 15) << 23;
355 if (exp == shifted_exp) {
356 o.u += (128 - 16) << 23;
357 }
else if (exp == 0) {
362 o.u |= (h.x & 0x8000) << 16;
369 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(
const half& a) {
370 return (a.x & 0x7fff) == 0x7c00;
372 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(
const half& a) {
373 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 376 return (a.x & 0x7fff) > 0x7c00;
379 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(
const half& a) {
380 return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
383 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half abs(
const half& a) {
385 result.x = a.x & 0x7FFF;
388 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half exp(
const half& a) {
389 return half(::expf(
float(a)));
391 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half log(
const half& a) {
392 #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 395 return half(::logf(
float(a)));
398 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half log1p(
const half& a) {
399 return half(numext::log1p(
float(a)));
401 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half log10(
const half& a) {
402 return half(::log10f(
float(a)));
404 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half sqrt(
const half& a) {
405 return half(::sqrtf(
float(a)));
407 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half pow(
const half& a,
const half& b) {
408 return half(::powf(
float(a),
float(b)));
410 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half sin(
const half& a) {
411 return half(::sinf(
float(a)));
413 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half cos(
const half& a) {
414 return half(::cosf(
float(a)));
416 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half tan(
const half& a) {
417 return half(::tanf(
float(a)));
419 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half tanh(
const half& a) {
420 return half(::tanhf(
float(a)));
422 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half floor(
const half& a) {
423 return half(::floorf(
float(a)));
425 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half ceil(
const half& a) {
426 return half(::ceilf(
float(a)));
429 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half (min)(
const half& a,
const half& b) {
430 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 431 return __hlt(b, a) ? b : a;
433 const float f1 =
static_cast<float>(a);
434 const float f2 =
static_cast<float>(b);
435 return f2 < f1 ? b : a;
438 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
half (max)(
const half& a,
const half& b) {
439 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 440 return __hlt(a, b) ? b : a;
442 const float f1 =
static_cast<float>(a);
443 const float f2 =
static_cast<float>(b);
444 return f1 < f2 ? b : a;
448 EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os,
const half& v) {
449 os << static_cast<float>(v);
461 struct random_default_impl<
half, false, false>
465 return x + (y-x) *
half(
float(std::rand()) / float(RAND_MAX));
467 static inline half run()
480 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE
Eigen::half epsilon() {
481 return half_impl::raw_uint16_to_half(0x0800);
484 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE
Eigen::half highest() {
485 return half_impl::raw_uint16_to_half(0x7bff);
487 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE
Eigen::half lowest() {
488 return half_impl::raw_uint16_to_half(0xfbff);
490 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE
Eigen::half infinity() {
491 return half_impl::raw_uint16_to_half(0x7c00);
493 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE
Eigen::half quiet_NaN() {
494 return half_impl::raw_uint16_to_half(0x7c01);
503 result.x = a.x & 0x7FFF;
510 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 531 #if __cplusplus > 199711L 533 struct hash<Eigen::
half> {
534 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(
const Eigen::half& a)
const {
535 return static_cast<std::size_t
>(a.x);
544 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 545 __device__ EIGEN_STRONG_INLINE
Eigen::half __shfl_xor(
Eigen::half var,
int laneMask,
int width=warpSize) {
546 return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
551 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 553 return Eigen::half_impl::raw_uint16_to_half(
554 __ldg(reinterpret_cast<const unsigned short*>(ptr)));
559 #if defined(__CUDA_ARCH__) 564 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
566 return (half_impl::isnan)(h);
570 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
572 return (half_impl::isinf)(h);
576 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
578 return (half_impl::isfinite)(h);
585 #endif // EIGEN_HALF_CUDA_H Definition: NumTraits.h:88
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:85
Holds information about the various numeric (i.e.
Definition: NumTraits.h:150
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
EIGEN_DEVICE_FUNC const Product< MatrixDerived, PermutationDerived, AliasFreeProduct > operator*(const MatrixBase< MatrixDerived > &matrix, const PermutationBase< PermutationDerived > &permutation)
Definition: PermutationMatrix.h:543
Definition: BandTriangularSolver.h:13