@@ -6,21 +6,13 @@ SPDX-License-Identifier: MIT
6
6
7
7
============================= end_copyright_notice ===========================*/
8
8
9
+ #include " f64consts.h"
9
10
#include < cm-cl/math.h>
10
11
#include < cm-cl/vector.h>
11
12
12
13
using namespace cm ;
13
14
14
15
namespace {
15
- // We have to use 32-bit integers when it's possible
16
- constexpr unsigned exp_shift = 52 - 32 ;
17
- constexpr unsigned exp_mask = 0x7ff ;
18
- constexpr unsigned exp_bias = 0x3ff ;
19
- constexpr unsigned exp_invmask = ~(exp_mask << exp_shift);
20
-
21
- constexpr unsigned nan_hi = 0x7ff80000 ;
22
- constexpr unsigned inf_hi = 0x7ff00000 ;
23
-
24
16
template <bool NNaN, bool NInf, bool NSZ, int N>
25
17
CM_NODEBUG CM_INLINE vector<double , N>
26
18
__impl_fdiv_special (vector<double , N> a, vector<double , N> b) {
@@ -42,8 +34,8 @@ __impl_fdiv_special(vector<double, N> a, vector<double, N> b) {
42
34
auto result_lo = result.template select <N, 2 >(0 );
43
35
auto result_hi = result.template select <N, 2 >(1 );
44
36
45
- auto ex_x_max = x_exp == 0x7ff ;
46
- auto ex_y_max = y_exp == 0x7ff ;
37
+ auto ex_x_max = x_exp == exp_mask ;
38
+ auto ex_y_max = y_exp == exp_mask ;
47
39
48
40
if constexpr (!NInf) // Inf / y == Inf
49
41
result_hi.merge (x_sgn ^ y_sgn | inf_hi, ex_x_max);
@@ -112,28 +104,25 @@ CM_NODEBUG CM_INLINE vector<double, N> __impl_fdiv_fast(vector<double, N> a,
112
104
113
105
// Long path, scale is needed
114
106
if (long_path.any ()) {
115
- constexpr double two64 = 0x1p+64 ;
116
- constexpr double twom64 = 0x1p-64 ;
117
-
118
107
// Handle subnormal a
119
108
mask<N> x_unorm = x_exp == 0 ;
120
109
if (x_unorm.any ()) {
121
- a.merge (a * two64 , x_unorm);
110
+ a.merge (a * twoPow64 , x_unorm);
122
111
x_exp = (vector<uint32_t , N>(x_hi) >> exp_shift) & exp_mask;
123
112
// if exp is still 0, we have zero or FTZ enabled
124
- scale0.merge (scale0 * twom64 , x_unorm & (x_exp != 0 ));
113
+ scale0.merge (scale0 * twoPowm64 , x_unorm & (x_exp != 0 ));
125
114
}
126
115
127
116
// Handle subnormal b
128
117
mask<N> y_unorm = y_exp == 0 ;
129
118
if (y_unorm.any ()) {
130
- b.merge (b * two64 , y_unorm);
119
+ b.merge (b * twoPow64 , y_unorm);
131
120
y_exp = (vector<uint32_t , N>(y_hi) >> exp_shift) & exp_mask;
132
121
// if exp is still 0, we have zero or FTZ enabled
133
- scale0.merge (scale0 * two64 , y_unorm & (y_exp != 0 ));
122
+ scale0.merge (scale0 * twoPow64 , y_unorm & (y_exp != 0 ));
134
123
}
135
124
136
- auto exp_diff = x_exp - y_exp + 0x7ff ;
125
+ auto exp_diff = x_exp - y_exp + exp_mask ;
137
126
138
127
auto scale1_hi =
139
128
scale1.template format <uint32_t >().template select <N, 2 >(1 );
@@ -286,28 +275,25 @@ CM_NODEBUG CM_INLINE vector<double, N> __impl_fdiv_ieee(vector<double, N> a,
286
275
287
276
// Long path, scale is needed
288
277
if (long_path.any ()) {
289
- constexpr double two64 = 0x1p+64 ;
290
- constexpr double twom64 = 0x1p-64 ;
291
-
292
278
// Handle subnormal a
293
279
mask<N> x_unorm = x_exp == 0 ;
294
280
if (x_unorm.any ()) {
295
- a.merge (a * two64 , x_unorm);
281
+ a.merge (a * twoPow64 , x_unorm);
296
282
x_exp = (vector<uint32_t , N>(x_hi) >> exp_shift) & exp_mask;
297
283
// if exp is still 0, we have zero or FTZ enabled
298
- scale0.merge (scale0 * twom64 , x_unorm & (x_exp != 0 ));
284
+ scale0.merge (scale0 * twoPowm64 , x_unorm & (x_exp != 0 ));
299
285
}
300
286
301
287
// Handle subnormal b
302
288
mask<N> y_unorm = y_exp == 0 ;
303
289
if (y_unorm.any ()) {
304
- b.merge (b * two64 , y_unorm);
290
+ b.merge (b * twoPow64 , y_unorm);
305
291
y_exp = (vector<uint32_t , N>(y_hi) >> exp_shift) & exp_mask;
306
292
// if exp is still 0, we have zero or FTZ enabled
307
- scale0.merge (scale0 * two64 , y_unorm & (y_exp != 0 ));
293
+ scale0.merge (scale0 * twoPow64 , y_unorm & (y_exp != 0 ));
308
294
}
309
295
310
- auto exp_diff = x_exp - y_exp + 0x7ff ;
296
+ auto exp_diff = x_exp - y_exp + exp_mask ;
311
297
312
298
auto scale1_hi =
313
299
scale1.template format <uint32_t >().template select <N, 2 >(1 );
@@ -339,8 +325,8 @@ CM_NODEBUG CM_INLINE vector<double, N> __impl_fdiv_ieee(vector<double, N> a,
339
325
b.merge (mb.template format <double >(), long_path);
340
326
341
327
// g_ediff value is needed to detect gradual underflow
342
- vector<double , N> abs_a = detail::__cm_cl_abs_float (a.cl_vector ());
343
- vector<double , N> abs_b = detail::__cm_cl_abs_float (b.cl_vector ());
328
+ vector<double , N> abs_a = math::absolute (a.cl_vector ());
329
+ vector<double , N> abs_b = math::absolute (b.cl_vector ());
344
330
345
331
vector<int64_t , N> i_abs_a = abs_a.template format <int64_t >();
346
332
vector<int64_t , N> i_abs_b = abs_b.template format <int64_t >();
@@ -472,4 +458,4 @@ CM_NODEBUG CM_NOINLINE extern "C" double __vc_builtin_fdiv_fast_f64(double a,
472
458
FDIV (1 )
473
459
FDIV(2 )
474
460
FDIV(4 )
475
- FDIV(8 )
461
+ FDIV(8 )
0 commit comments