@@ -25,49 +25,39 @@ static uint16_t float2Half(const float &Val) {
25
25
const uint32_t Frac32 = Bits & 0x7fffff ;
26
26
// Extract the exponent from the float value
27
27
const uint8_t Exp32 = (Bits & 0x7f800000 ) >> 23 ;
28
- const int8_t Exp32Diff = Exp32 - 127 ;
28
+ const int16_t Exp32Diff = Exp32 - 127 ;
29
29
30
- uint16_t Exp16 = 0 ;
30
+ // intialize to 0, covers the case for 0 and small numbers
31
+ uint16_t Exp16 = 0 , Frac16 = 0 ;
31
32
32
- // convert 23-bit mantissa to 10-bit mantissa.
33
- uint16_t Frac16 = Frac32 >> 13 ;
34
- // Round the mantissa as given in OpenCL spec section : 6.1.1.1 The half data
35
- // type.
36
- if (Frac32 >> 12 & 0x01 )
37
- Frac16 += 1 ;
38
-
39
- if (__builtin_expect (Exp32 == 0xff || Exp32Diff > 15 , 0 )) {
33
+ if (__builtin_expect (Exp32Diff > 15 , 0 )) {
34
+ // Infinity and big numbers convert to infinity
40
35
Exp16 = 0x1f ;
41
- } else if (__builtin_expect (Exp32 == 0 || Exp32Diff < -14 , 0 )) {
42
- Exp16 = 0 ;
43
- } else {
36
+ } else if (__builtin_expect (Exp32Diff > -14 , 0 )) {
37
+ // normal range for half type
44
38
Exp16 = Exp32Diff + 15 ;
39
+ // convert 23-bit mantissa to 10-bit mantissa.
40
+ Frac16 = Frac32 >> 13 ;
41
+ // Round the mantissa as given in OpenCL spec section : 6.1.1.1 The half
42
+ // data type.
43
+ if (Frac32 >> 12 & 0x01 )
44
+ Frac16 += 1 ;
45
+ } else if (__builtin_expect (Exp32Diff > -24 , 0 )) {
46
+ // subnormals
47
+ Frac16 = (Frac32 | (uint32_t (1 ) << 23 )) >> (-Exp32Diff - 1 );
45
48
}
46
49
47
- if (__builtin_expect (Exp32 == 0xff && Frac32 != 0 && Frac16 == 0 , 0 )) {
48
- // corner case 1: NaN
49
- // This case happens when FP32 value is NaN whose the fraction part
50
- // transformed to FP16 counterpart is truncated to 0. We need to flip the
51
- // high bit to 1 to make it distinguished from inf.
50
+ if (__builtin_expect (Exp32 == 0xff && Frac32 != 0 , 0 )) {
51
+ // corner case: FP32 is NaN
52
+ Exp16 = 0x1F ;
52
53
Frac16 = 0x200 ;
53
- } else if (__builtin_expect (Exp32 == 0 || (Exp16 == 0x1f && Exp32 != 0xff ),
54
- 0 )) {
55
- // corner case 2: subnormal
56
- // All FP32 subnormal values are under the range of FP16 so the fraction
57
- // part is set to 0.
58
- // corner case 3: overflow
59
- Frac16 = 0 ;
60
- } else if (__builtin_expect (Exp16 == 0 && Exp32 != 0 , 0 )) {
61
- // corner case 4: underflow
62
- // We use `truncate` mode here.
63
- Frac16 = 0x100 | (Frac16 >> 2 );
64
54
}
65
55
66
56
// Compose the final FP16 binary
67
57
uint16_t Ret = 0 ;
68
58
Ret |= Sign;
69
59
Ret |= Exp16 << 10 ;
70
- Ret += Frac16;// Add the carry bit from operation Frac16 += 1;
60
+ Ret += Frac16; // Add the carry bit from operation Frac16 += 1;
71
61
72
62
return Ret;
73
63
}
@@ -181,7 +171,7 @@ bool operator==(const half &LHS, const half &RHS) {
181
171
}
182
172
183
173
bool operator !=(const half &LHS, const half &RHS) { return !(LHS == RHS); }
184
- } // namespace half_impl
174
+ } // namespace host_half_impl
185
175
186
176
} // namespace detail
187
177
} // namespace sycl
0 commit comments