Skip to content

Commit b4ef43f

Browse files
committed
[libclc] Format clc_fma.cl. NFC
1 parent f96ea89 commit b4ef43f

File tree

1 file changed

+118
-114
lines changed

1 file changed

+118
-114
lines changed

libclc/generic/lib/math/clc_fma.cl

Lines changed: 118 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -22,137 +22,141 @@
2222

2323
#include <clc/clc.h>
2424

25+
#include "../clcmacro.h"
2526
#include "config.h"
2627
#include "math.h"
27-
#include "../clcmacro.h"
2828

2929
struct fp {
30-
ulong mantissa;
31-
int exponent;
32-
uint sign;
30+
ulong mantissa;
31+
int exponent;
32+
uint sign;
3333
};
3434

35-
_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c)
36-
{
37-
/* special cases */
38-
if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b))
39-
return mad(a, b, c);
35+
_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
36+
/* special cases */
37+
if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b))
38+
return mad(a, b, c);
4039

41-
/* If only c is inf, and both a,b are regular numbers, the result is c*/
42-
if (isinf(c))
43-
return c;
40+
/* If only c is inf, and both a,b are regular numbers, the result is c*/
41+
if (isinf(c))
42+
return c;
4443

45-
a = __clc_flush_denormal_if_not_supported(a);
46-
b = __clc_flush_denormal_if_not_supported(b);
47-
c = __clc_flush_denormal_if_not_supported(c);
44+
a = __clc_flush_denormal_if_not_supported(a);
45+
b = __clc_flush_denormal_if_not_supported(b);
46+
c = __clc_flush_denormal_if_not_supported(c);
4847

49-
if (c == 0)
50-
return a * b;
48+
if (c == 0)
49+
return a * b;
5150

52-
struct fp st_a, st_b, st_c;
51+
struct fp st_a, st_b, st_c;
5352

54-
st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127;
55-
st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127;
56-
st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127;
53+
st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127;
54+
st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127;
55+
st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127;
5756

58-
st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000;
59-
st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000;
60-
st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000;
57+
st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000;
58+
st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000;
59+
st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000;
6160

62-
st_a.sign = as_uint(a) & 0x80000000;
63-
st_b.sign = as_uint(b) & 0x80000000;
64-
st_c.sign = as_uint(c) & 0x80000000;
61+
st_a.sign = as_uint(a) & 0x80000000;
62+
st_b.sign = as_uint(b) & 0x80000000;
63+
st_c.sign = as_uint(c) & 0x80000000;
6564

66-
// Multiplication.
67-
// Move the product to the highest bits to maximize precision
68-
// mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
69-
// Add one bit for future addition overflow,
70-
// add another bit to detect subtraction underflow
71-
struct fp st_mul;
72-
st_mul.sign = st_a.sign ^ st_b.sign;
73-
st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
74-
st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
65+
// Multiplication.
66+
// Move the product to the highest bits to maximize precision
67+
// mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
68+
// Add one bit for future addition overflow,
69+
// add another bit to detect subtraction underflow
70+
struct fp st_mul;
71+
st_mul.sign = st_a.sign ^ st_b.sign;
72+
st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
73+
st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
7574

76-
// FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
77-
if (st_mul.exponent == 0 && st_mul.mantissa == 0)
78-
return c;
75+
// FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
76+
if (st_mul.exponent == 0 && st_mul.mantissa == 0)
77+
return c;
7978

8079
// Mantissa is 23 fractional bits, shift it the same way as product mantissa
8180
#define C_ADJUST 37ul
8281

83-
// both exponents are bias adjusted
84-
int exp_diff = st_mul.exponent - st_c.exponent;
85-
86-
st_c.mantissa <<= C_ADJUST;
87-
ulong cutoff_bits = 0;
88-
ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul;
89-
if (exp_diff > 0) {
90-
cutoff_bits = exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
91-
st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
92-
} else {
93-
cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
94-
st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
95-
}
96-
97-
struct fp st_fma;
98-
st_fma.sign = st_mul.sign;
99-
st_fma.exponent = max(st_mul.exponent, st_c.exponent);
100-
if (st_c.sign == st_mul.sign) {
101-
st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
102-
} else {
103-
// cutoff bits borrow one
104-
st_fma.mantissa = st_mul.mantissa - st_c.mantissa - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
105-
}
106-
107-
// underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
108-
if (st_fma.mantissa > LONG_MAX) {
109-
st_fma.mantissa = 0 - st_fma.mantissa;
110-
st_fma.sign = st_mul.sign ^ 0x80000000;
111-
}
112-
113-
// detect overflow/underflow
114-
int overflow_bits = 3 - clz(st_fma.mantissa);
115-
116-
// adjust exponent
117-
st_fma.exponent += overflow_bits;
118-
119-
// handle underflow
120-
if (overflow_bits < 0) {
121-
st_fma.mantissa <<= -overflow_bits;
122-
overflow_bits = 0;
123-
}
124-
125-
// rounding
126-
ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
127-
ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
128-
ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
129-
ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
130-
131-
// round to nearest even
132-
if ((trunc_bits > grs_bits) ||
133-
(trunc_bits == grs_bits && last_bit != 0))
134-
st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
135-
136-
// Shift mantissa back to bit 23
137-
st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
138-
139-
// Detect rounding overflow
140-
if (st_fma.mantissa > 0xffffff) {
141-
++st_fma.exponent;
142-
st_fma.mantissa >>= 1;
143-
}
144-
145-
if (st_fma.mantissa == 0)
146-
return .0f;
147-
148-
// Flating point range limit
149-
if (st_fma.exponent > 127)
150-
return as_float(as_uint(INFINITY) | st_fma.sign);
151-
152-
// Flush denormals
153-
if (st_fma.exponent <= -127)
154-
return as_float(st_fma.sign);
155-
156-
return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | ((uint)st_fma.mantissa & 0x7fffff));
82+
// both exponents are bias adjusted
83+
int exp_diff = st_mul.exponent - st_c.exponent;
84+
85+
st_c.mantissa <<= C_ADJUST;
86+
ulong cutoff_bits = 0;
87+
ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul;
88+
if (exp_diff > 0) {
89+
cutoff_bits =
90+
exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
91+
st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
92+
} else {
93+
cutoff_bits =
94+
-exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
95+
st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
96+
}
97+
98+
struct fp st_fma;
99+
st_fma.sign = st_mul.sign;
100+
st_fma.exponent = max(st_mul.exponent, st_c.exponent);
101+
if (st_c.sign == st_mul.sign) {
102+
st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
103+
} else {
104+
// cutoff bits borrow one
105+
st_fma.mantissa =
106+
st_mul.mantissa - st_c.mantissa -
107+
(cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
108+
}
109+
110+
// underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
111+
if (st_fma.mantissa > LONG_MAX) {
112+
st_fma.mantissa = 0 - st_fma.mantissa;
113+
st_fma.sign = st_mul.sign ^ 0x80000000;
114+
}
115+
116+
// detect overflow/underflow
117+
int overflow_bits = 3 - clz(st_fma.mantissa);
118+
119+
// adjust exponent
120+
st_fma.exponent += overflow_bits;
121+
122+
// handle underflow
123+
if (overflow_bits < 0) {
124+
st_fma.mantissa <<= -overflow_bits;
125+
overflow_bits = 0;
126+
}
127+
128+
// rounding
129+
ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
130+
ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
131+
ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
132+
ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
133+
134+
// round to nearest even
135+
if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0))
136+
st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
137+
138+
// Shift mantissa back to bit 23
139+
st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
140+
141+
// Detect rounding overflow
142+
if (st_fma.mantissa > 0xffffff) {
143+
++st_fma.exponent;
144+
st_fma.mantissa >>= 1;
145+
}
146+
147+
if (st_fma.mantissa == 0)
148+
return .0f;
149+
150+
// Flating point range limit
151+
if (st_fma.exponent > 127)
152+
return as_float(as_uint(INFINITY) | st_fma.sign);
153+
154+
// Flush denormals
155+
if (st_fma.exponent <= -127)
156+
return as_float(st_fma.sign);
157+
158+
return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
159+
((uint)st_fma.mantissa & 0x7fffff));
157160
}
158-
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, float, float)
161+
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float,
162+
float, float)

0 commit comments

Comments
 (0)