Skip to content

Commit 8a7a096

Browse files
authored
Merge pull request #5784 from apple/eng/PR-102641225
[compiler-rt][X86] Add half <-> x86_fp80 conversion builtins
2 parents d3160d0 + 77e4409 commit 8a7a096

File tree

11 files changed

+406
-11
lines changed

11 files changed

+406
-11
lines changed

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ endif ()
283283
# long double is not 80 bits on Android or MSVC.
284284
set(x86_80_BIT_SOURCES
285285
divxc3.c
286+
extendhfxf2.c
286287
fixxfdi.c
287288
fixxfti.c
288289
fixunsxfdi.c
@@ -294,6 +295,7 @@ set(x86_80_BIT_SOURCES
294295
floatuntixf.c
295296
mulxc3.c
296297
powixf2.c
298+
truncxfhf2.c
297299
)
298300

299301
if (NOT MSVC)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===-- lib/extendhfxf2.c - half -> x86 FP80 conversion -----------*- C -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define FP80_PRECISION
10+
#include "fp_lib.h"
11+
12+
#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_80BIT)
13+
14+
#define SRC_HALF
15+
#define DST_FP80
16+
#include "fp_extend_impl.inc"
17+
18+
// Use a forwarding definition and noinline to implement a poor man's alias,
19+
// as there isn't a good cross-platform way of defining one.
20+
COMPILER_RT_ABI NOINLINE long double __extendhfxf2(src_t a) {
21+
return __extendXfYf2__(a);
22+
}
23+
24+
#endif

compiler-rt/lib/builtins/fp_extend.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,37 @@ static const int srcSigBits = 10;
5858
typedef float dst_t;
5959
typedef uint32_t dst_rep_t;
6060
#define DST_REP_C UINT32_C
61+
static const int dstBits = 32;
6162
static const int dstSigBits = 23;
63+
static const int dstIntBits = 0;
6264

6365
#elif defined DST_DOUBLE
6466
typedef double dst_t;
6567
typedef uint64_t dst_rep_t;
6668
#define DST_REP_C UINT64_C
69+
static const int dstBits = 64;
6770
static const int dstSigBits = 52;
71+
static const int dstIntBits = 0;
72+
73+
#elif defined DST_FP80
74+
typedef long double dst_t;
75+
typedef __uint128_t dst_rep_t;
76+
#define DST_REP_C (__uint128_t)
77+
static const int dstBits = 80;
78+
static const int dstSigBits = 64;
79+
static const int dstIntBits = 1;
80+
6881

6982
#elif defined DST_QUAD
7083
typedef long double dst_t;
7184
typedef __uint128_t dst_rep_t;
7285
#define DST_REP_C (__uint128_t)
86+
static const int dstBits = 128;
7387
static const int dstSigBits = 112;
88+
static const int dstIntBits = 0;
7489

7590
#else
76-
#error Destination should be single, double, or quad precision!
91+
#error Destination should be single, double, fp80, or quad precision!
7792
#endif // end destination precision
7893

7994
// End of specialization parameters. Two helper routines for conversion to and

compiler-rt/lib/builtins/fp_extend_impl.inc

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@ static __inline dst_t __extendXfYf2__(src_t a) {
5252
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
5353
const src_rep_t srcNaNCode = srcQNaN - 1;
5454

55-
const int dstBits = sizeof(dst_t) * CHAR_BIT;
5655
const int dstExpBits = dstBits - dstSigBits - 1;
5756
const int dstInfExp = (1 << dstExpBits) - 1;
5857
const int dstExpBias = dstInfExp >> 1;
5958

6059
const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
60+
const dst_rep_t dstSignificandMask = dstMinNormal - 1;
6161

6262
// Break a into a sign and representation of the absolute value.
6363
const src_rep_t aRep = srcToRep(a);
@@ -72,6 +72,19 @@ static __inline dst_t __extendXfYf2__(src_t a) {
7272
// Extend to the destination type by shifting the significand and
7373
// exponent into the proper position and rebiasing the exponent.
7474
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
75+
76+
if (dstIntBits) {
77+
// x86_fp80 has an explicit instead of implicit integer bit at the top of
78+
// the significand. Canonical values (except denormals & zero) set it to
79+
// 1.
80+
dst_rep_t absSignificand = absResult & dstSignificandMask;
81+
absSignificand >>= 1;
82+
absSignificand |= (dst_rep_t)1 << (dstSigBits - 1);
83+
84+
absResult &= ~dstSignificandMask;
85+
absResult |= absSignificand;
86+
}
87+
7588
absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
7689
}
7790

@@ -81,16 +94,18 @@ static __inline dst_t __extendXfYf2__(src_t a) {
8194
// bit (if needed) and right-aligning the rest of the trailing NaN
8295
// payload field.
8396
absResult = (dst_rep_t)dstInfExp << dstSigBits;
84-
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
85-
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
97+
if (dstIntBits)
98+
absResult |= (dst_rep_t)1 << (dstSigBits - 1);
99+
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - dstIntBits - srcSigBits);
100+
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - dstIntBits - srcSigBits);
86101
}
87102

88103
else if (aAbs) {
89104
// a is denormal.
90105
// renormalize the significand and clear the leading bit, then insert
91106
// the correct adjusted exponent in the destination type.
92107
const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
93-
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
108+
absResult = (dst_rep_t)aAbs << (dstSigBits - dstIntBits - srcSigBits + scale);
94109
absResult ^= dstMinNormal;
95110
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
96111
absResult |= (dst_rep_t)resultExponent << dstSigBits;

compiler-rt/lib/builtins/fp_lib.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
104104

105105
COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
106106

107+
#elif defined FP80_PRECISION
108+
#if __LDBL_MANT_DIG__ == 64
109+
#define CRT_LDBL_80BIT
110+
// Only x86 does 80-bit floats, only support extend/trunc.
111+
#endif
112+
107113
#elif defined QUAD_PRECISION
108114
#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__)
109115
#define CRT_LDBL_128BIT
@@ -202,7 +208,7 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
202208
#undef Word_FullMask
203209
#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__
204210
#else
205-
#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
211+
#error SINGLE_PRECISION, DOUBLE_PRECISION, FP80_PRECISION, or QUAD_PRECISION must be defined.
206212
#endif
207213

208214
#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \

compiler-rt/lib/builtins/fp_trunc.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,36 @@
1919
typedef float src_t;
2020
typedef uint32_t src_rep_t;
2121
#define SRC_REP_C UINT32_C
22+
static const int srcBits = 32;
2223
static const int srcSigBits = 23;
24+
static const int srcIntBits = 0;
2325

2426
#elif defined SRC_DOUBLE
2527
typedef double src_t;
2628
typedef uint64_t src_rep_t;
2729
#define SRC_REP_C UINT64_C
30+
static const int srcBits = 64;
2831
static const int srcSigBits = 52;
32+
static const int srcIntBits = 0;
33+
34+
#elif defined SRC_FLT80
35+
typedef long double src_t;
36+
typedef __uint128_t src_rep_t;
37+
#define SRC_REP_C (__uint128_t)
38+
static const int srcBits = 80;
39+
static const int srcSigBits = 64;
40+
static const int srcIntBits = 1;
2941

3042
#elif defined SRC_QUAD
3143
typedef long double src_t;
3244
typedef __uint128_t src_rep_t;
3345
#define SRC_REP_C (__uint128_t)
46+
static const int srcBits = 128;
3447
static const int srcSigBits = 112;
48+
static const int srcIntBits = 0;
3549

3650
#else
37-
#error Source should be double precision or quad precision!
51+
#error Source should be double precision, fp80 precision, or quad precision!
3852
#endif // end source precision
3953

4054
#if defined DST_DOUBLE
@@ -77,7 +91,13 @@ static __inline src_rep_t srcToRep(src_t x) {
7791
src_t f;
7892
src_rep_t i;
7993
} rep = {.f = x};
80-
return rep.i;
94+
src_rep_t res = rep.i;
95+
96+
// Zero out the padding bits from the union if needed.
97+
if (sizeof(src_rep_t) > sizeof(src_t))
98+
res &= (((src_rep_t)1 << sizeof(src_t)*CHAR_BIT) - 1);
99+
100+
return res;
81101
}
82102

83103
static __inline dst_t dstFromRep(dst_rep_t x) {

compiler-rt/lib/builtins/fp_trunc_impl.inc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
int printf(const char *, ...);
12
//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
23
//
34
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -38,10 +39,28 @@
3839

3940
#include "fp_trunc.h"
4041

42+
// x86_fp80 has an explicit integer bit at the top of the significand.
43+
// This allowed more weird denormals, infinities and NaNs in 8087 & 80287;
44+
// but from 387 onwards those are treated as invalid and we can just
45+
// ignore the issue by converting to a canonical "usual-format" IEEE-857.
46+
47+
static src_rep_t removeExplicitIntBit(src_rep_t in) {
48+
if (!srcIntBits)
49+
return in;
50+
51+
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
52+
const src_rep_t srcSignificandMask = srcMinNormal - 1;
53+
54+
src_rep_t inSig = in & srcSignificandMask;
55+
in &= ~srcSignificandMask;
56+
in |= (inSig << 1) & srcSignificandMask;
57+
58+
return in;
59+
}
60+
4161
static __inline dst_t __truncXfYf2__(src_t a) {
4262
// Various constants whose values follow from the type parameters.
4363
// Any reasonable optimizer will fold and propagate all of these.
44-
const int srcBits = sizeof(src_t) * CHAR_BIT;
4564
const int srcExpBits = srcBits - srcSigBits - 1;
4665
const int srcInfExp = (1 << srcExpBits) - 1;
4766
const int srcExpBias = srcInfExp >> 1;
@@ -71,14 +90,15 @@ static __inline dst_t __truncXfYf2__(src_t a) {
7190

7291
// Break a into a sign and representation of the absolute value.
7392
const src_rep_t aRep = srcToRep(a);
74-
const src_rep_t aAbs = aRep & srcAbsMask;
93+
src_rep_t aAbs = aRep & srcAbsMask;
7594
const src_rep_t sign = aRep & srcSignMask;
7695
dst_rep_t absResult;
7796

7897
if (aAbs - underflow < aAbs - overflow) {
7998
// The exponent of a is within the range of normal numbers in the
8099
// destination format. We can convert by simply right-shifting with
81100
// rounding and adjusting the exponent.
101+
aAbs = removeExplicitIntBit(aAbs);
82102
absResult = aAbs >> (srcSigBits - dstSigBits);
83103
absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
84104

@@ -104,10 +124,11 @@ static __inline dst_t __truncXfYf2__(src_t a) {
104124
// a underflows on conversion to the destination type or is an exact
105125
// zero. The result may be a denormal or zero. Extract the exponent
106126
// to get the shift amount for the denormalization.
127+
aAbs = removeExplicitIntBit(aAbs);
107128
const int aExp = aAbs >> srcSigBits;
108129
const int shift = srcExpBias - dstExpBias - aExp + 1;
109130

110-
const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal;
131+
const src_rep_t significand = (aAbs & srcSignificandMask) | srcMinNormal;
111132

112133
// Right shift by the denormalization amount with sticky.
113134
if (shift > srcSigBits) {

compiler-rt/lib/builtins/truncxfhf2.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- lib/trunctfhf2.c - quad -> half conversion ----------------*- C -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is dual licensed under the MIT and the University of Illinois Open
6+
// Source Licenses. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#define FP80_PRECISION
11+
#include "fp_lib.h"
12+
13+
#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_80BIT)
14+
15+
#define SRC_FLT80
16+
#define DST_HALF
17+
#include "fp_trunc_impl.inc"
18+
19+
COMPILER_RT_ABI _Float16 __truncxfhf2(long double a) {
20+
return __truncXfYf2__(a);
21+
}
22+
23+
#endif
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// RUN: %clang_builtins %s %librt -o %t && %run %t
2+
// REQUIRES: librt_has_extendhfxf2
3+
4+
#include <stdio.h>
5+
6+
#include "fp_test.h"
7+
8+
long double __extendhfxf2(TYPE_FP16 a);
9+
10+
int test__extendhfxf2(TYPE_FP16 a, uint64_t expectedHi, uint64_t expectedLo)
11+
{
12+
long double x = __extendhfxf2(a);
13+
int ret = compareResultLD(x, expectedHi, expectedLo);
14+
15+
if (ret){
16+
printf("error in test__extendhfxf2(%#.4x) = %Lf, "
17+
"expected %Lf\n", toRep16(a), x, fromRep80(expectedHi, expectedLo));
18+
19+
}
20+
return ret;
21+
}
22+
23+
char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0};
24+
25+
int main()
26+
{
27+
// qNaN
28+
if (test__extendhfxf2(fromRep16(0x7e00),
29+
UINT64_C(0x7fff),
30+
UINT64_C(0xc000000000000000)))
31+
return 1;
32+
// NaN
33+
if (test__extendhfxf2(fromRep16(0x7f80),
34+
UINT64_C(0x7fff),
35+
UINT64_C(0xf000000000000000)))
36+
return 1;
37+
// inf
38+
if (test__extendhfxf2(fromRep16(0x7c00),
39+
UINT64_C(0x7fff),
40+
UINT64_C(0x8000000000000000)))
41+
return 1;
42+
// -inf
43+
if (test__extendhfxf2(fromRep16(0xfc00),
44+
UINT64_C(0xffff),
45+
UINT64_C(0x8000000000000000)))
46+
return 1;
47+
// zero
48+
if (test__extendhfxf2(fromRep16(0x0),
49+
UINT64_C(0x0000),
50+
UINT64_C(0x0000000000000000)))
51+
return 1;
52+
// -zero
53+
if (test__extendhfxf2(fromRep16(0x8000),
54+
UINT64_C(0x8000),
55+
UINT64_C(0x0000000000000000)))
56+
return 1;
57+
if (test__extendhfxf2(fromRep16(0x4248),
58+
UINT64_C(0x4000),
59+
UINT64_C(0xc900000000000000)))
60+
return 1;
61+
if (test__extendhfxf2(fromRep16(0xc248),
62+
UINT64_C(0xc000),
63+
UINT64_C(0xc900000000000000)))
64+
return 1;
65+
if (test__extendhfxf2(fromRep16(0x6e62),
66+
UINT64_C(0x400b),
67+
UINT64_C(0xcc40000000000000)))
68+
return 1;
69+
if (test__extendhfxf2(fromRep16(0x3c00),
70+
UINT64_C(0x3fff),
71+
UINT64_C(0x8000000000000000)))
72+
return 1;
73+
if (test__extendhfxf2(fromRep16(0x0400),
74+
UINT64_C(0x3ff1),
75+
UINT64_C(0x8000000000000000)))
76+
return 1;
77+
// denormal
78+
if (test__extendhfxf2(fromRep16(0x0010),
79+
UINT64_C(0x3feb),
80+
UINT64_C(0x8000000000000000)))
81+
return 1;
82+
if (test__extendhfxf2(fromRep16(0x0001),
83+
UINT64_C(0x3fe7),
84+
UINT64_C(0x8000000000000000)))
85+
return 1;
86+
if (test__extendhfxf2(fromRep16(0x8001),
87+
UINT64_C(0xbfe7),
88+
UINT64_C(0x8000000000000000)))
89+
return 1;
90+
// max (precise)
91+
if (test__extendhfxf2(fromRep16(0x7bff),
92+
UINT64_C(0x400e),
93+
UINT64_C(0xffe0000000000000)))
94+
return 1;
95+
return 0;
96+
}

0 commit comments

Comments
 (0)