Skip to content

Commit 6ae8b41

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:28a686a704fa into amd-gfx:ae5318a2a560
Local branch amd-gfx ae5318a Merged main:233c3e6c53a5 into amd-gfx:8892e09cb17c Remote branch main 28a686a [flang][NFC] Speed up large DATA statement initializations (llvm#67585)
2 parents ae5318a + 28a686a commit 6ae8b41

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1374
-224
lines changed

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ endif ()
280280
# long double is not 80 bits on Android or MSVC.
281281
set(x86_80_BIT_SOURCES
282282
divxc3.c
283+
extendxftf2.c
283284
fixxfdi.c
284285
fixxfti.c
285286
fixunsxfdi.c
@@ -291,6 +292,7 @@ set(x86_80_BIT_SOURCES
291292
floatuntixf.c
292293
mulxc3.c
293294
powixf2.c
295+
trunctfxf2.c
294296
)
295297

296298
if (NOT MSVC)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- lib/extendxftf2.c - long double -> quad conversion --------*- C -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// Assumption: long double is a IEEE 80 bit floating point type padded to 128
10+
// bits.
11+
12+
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
13+
#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \
14+
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
15+
#define SRC_80
16+
#define DST_QUAD
17+
#include "fp_extend_impl.inc"
18+
19+
COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
20+
return __extendXfYf2__(a);
21+
}
22+
23+
#endif

compiler-rt/lib/builtins/fp_extend.h

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,24 @@
2020
typedef float src_t;
2121
typedef uint32_t src_rep_t;
2222
#define SRC_REP_C UINT32_C
23-
static const int srcSigBits = 23;
23+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
24+
static const int srcSigFracBits = 23;
25+
// -1 accounts for the sign bit.
26+
// srcBits - srcSigFracBits - 1
27+
static const int srcExpBits = 8;
2428
#define src_rep_t_clz clzsi
2529

2630
#elif defined SRC_DOUBLE
2731
typedef double src_t;
2832
typedef uint64_t src_rep_t;
2933
#define SRC_REP_C UINT64_C
30-
static const int srcSigBits = 52;
31-
static __inline int src_rep_t_clz(src_rep_t a) {
34+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
35+
static const int srcSigFracBits = 52;
36+
// -1 accounts for the sign bit.
37+
// srcBits - srcSigFracBits - 1
38+
static const int srcExpBits = 11;
39+
40+
static inline int src_rep_t_clz_impl(src_rep_t a) {
3241
#if defined __LP64__
3342
return __builtin_clzl(a);
3443
#else
@@ -38,6 +47,19 @@ static __inline int src_rep_t_clz(src_rep_t a) {
3847
return 32 + clzsi(a & REP_C(0xffffffff));
3948
#endif
4049
}
50+
#define src_rep_t_clz src_rep_t_clz_impl
51+
52+
#elif defined SRC_80
53+
typedef long double src_t;
54+
typedef __uint128_t src_rep_t;
55+
#define SRC_REP_C (__uint128_t)
56+
// sign bit, exponent and significand occupy the lower 80 bits.
57+
static const int srcBits = 80;
58+
static const int srcSigFracBits = 63;
59+
// -1 accounts for the sign bit.
60+
// -1 accounts for the explicitly stored integer bit.
61+
// srcBits - srcSigFracBits - 1 - 1
62+
static const int srcExpBits = 15;
4163

4264
#elif defined SRC_HALF
4365
#ifdef COMPILER_RT_HAS_FLOAT16
@@ -47,7 +69,12 @@ typedef uint16_t src_t;
4769
#endif
4870
typedef uint16_t src_rep_t;
4971
#define SRC_REP_C UINT16_C
50-
static const int srcSigBits = 10;
72+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
73+
static const int srcSigFracBits = 10;
74+
// -1 accounts for the sign bit.
75+
// srcBits - srcSigFracBits - 1
76+
static const int srcExpBits = 5;
77+
5178
#define src_rep_t_clz __builtin_clz
5279

5380
#else
@@ -58,36 +85,86 @@ static const int srcSigBits = 10;
5885
typedef float dst_t;
5986
typedef uint32_t dst_rep_t;
6087
#define DST_REP_C UINT32_C
61-
static const int dstSigBits = 23;
88+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
89+
static const int dstSigFracBits = 23;
90+
// -1 accounts for the sign bit.
91+
// dstBits - dstSigFracBits - 1
92+
static const int dstExpBits = 8;
6293

6394
#elif defined DST_DOUBLE
6495
typedef double dst_t;
6596
typedef uint64_t dst_rep_t;
6697
#define DST_REP_C UINT64_C
67-
static const int dstSigBits = 52;
98+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
99+
static const int dstSigFracBits = 52;
100+
// -1 accounts for the sign bit.
101+
// dstBits - dstSigFracBits - 1
102+
static const int dstExpBits = 11;
68103

69104
#elif defined DST_QUAD
105+
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
106+
#if __LDBL_MANT_DIG__ == 113
70107
typedef long double dst_t;
108+
#elif defined(__x86_64__) && \
109+
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
110+
typedef __float128 dst_t;
111+
#endif
71112
typedef __uint128_t dst_rep_t;
72113
#define DST_REP_C (__uint128_t)
73-
static const int dstSigBits = 112;
114+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
115+
static const int dstSigFracBits = 112;
116+
// -1 accounts for the sign bit.
117+
// dstBits - dstSigFracBits - 1
118+
static const int dstExpBits = 15;
74119

75120
#else
76121
#error Destination should be single, double, or quad precision!
77122
#endif // end destination precision
78123

79-
// End of specialization parameters. Two helper routines for conversion to and
80-
// from the representation of floating-point data as integer values follow.
124+
// End of specialization parameters.
125+
126+
// TODO: These helper routines should be placed into fp_lib.h
127+
// Currently they depend on macros/constants defined above.
128+
129+
static inline src_rep_t extract_sign_from_src(src_rep_t x) {
130+
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
131+
return (x & srcSignMask) >> (srcBits - 1);
132+
}
133+
134+
static inline src_rep_t extract_exp_from_src(src_rep_t x) {
135+
const int srcSigBits = srcBits - 1 - srcExpBits;
136+
const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
137+
return (x & srcExpMask) >> srcSigBits;
138+
}
139+
140+
static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
141+
const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
142+
return x & srcSigFracMask;
143+
}
144+
145+
#ifdef src_rep_t_clz
146+
static inline int clz_in_sig_frac(src_rep_t sigFrac) {
147+
const int skip = (sizeof(dst_t) * CHAR_BIT - srcBits) + 1 + srcExpBits;
148+
return src_rep_t_clz(sigFrac) - skip;
149+
}
150+
#endif
151+
152+
static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
153+
return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
154+
}
155+
156+
// Two helper routines for conversion to and from the representation of
157+
// floating-point data as integer values follow.
81158

82-
static __inline src_rep_t srcToRep(src_t x) {
159+
static inline src_rep_t srcToRep(src_t x) {
83160
const union {
84161
src_t f;
85162
src_rep_t i;
86163
} rep = {.f = x};
87164
return rep.i;
88165
}
89166

90-
static __inline dst_t dstFromRep(dst_rep_t x) {
167+
static inline dst_t dstFromRep(dst_rep_t x) {
91168
const union {
92169
dst_t f;
93170
dst_rep_t i;

compiler-rt/lib/builtins/fp_extend_impl.inc

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,71 +37,72 @@
3737

3838
#include "fp_extend.h"
3939

40+
// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
41+
// format. In particular, for the source type srcSigFracBits may be not equal to
42+
// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
43+
// types.
4044
static __inline dst_t __extendXfYf2__(src_t a) {
4145
// Various constants whose values follow from the type parameters.
4246
// Any reasonable optimizer will fold and propagate all of these.
43-
const int srcBits = sizeof(src_t) * CHAR_BIT;
44-
const int srcExpBits = srcBits - srcSigBits - 1;
4547
const int srcInfExp = (1 << srcExpBits) - 1;
4648
const int srcExpBias = srcInfExp >> 1;
4749

48-
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
49-
const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
50-
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
51-
const src_rep_t srcAbsMask = srcSignMask - 1;
52-
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
53-
const src_rep_t srcNaNCode = srcQNaN - 1;
54-
55-
const int dstBits = sizeof(dst_t) * CHAR_BIT;
56-
const int dstExpBits = dstBits - dstSigBits - 1;
5750
const int dstInfExp = (1 << dstExpBits) - 1;
5851
const int dstExpBias = dstInfExp >> 1;
5952

60-
const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
61-
6253
// Break a into a sign and representation of the absolute value.
6354
const src_rep_t aRep = srcToRep(a);
64-
const src_rep_t aAbs = aRep & srcAbsMask;
65-
const src_rep_t sign = aRep & srcSignMask;
66-
dst_rep_t absResult;
55+
const src_rep_t srcSign = extract_sign_from_src(aRep);
56+
const src_rep_t srcExp = extract_exp_from_src(aRep);
57+
const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
58+
59+
dst_rep_t dstSign = srcSign;
60+
dst_rep_t dstExp;
61+
dst_rep_t dstSigFrac;
6762

68-
// If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
69-
// to (signed) int. To avoid that, explicitly cast to src_rep_t.
70-
if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
63+
if (srcExp >= 1 && srcExp < srcInfExp) {
7164
// a is a normal number.
72-
// Extend to the destination type by shifting the significand and
73-
// exponent into the proper position and rebiasing the exponent.
74-
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
75-
absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
65+
dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
66+
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
7667
}
7768

78-
else if (aAbs >= srcInfinity) {
69+
else if (srcExp == srcInfExp) {
7970
// a is NaN or infinity.
80-
// Conjure the result by beginning with infinity, then setting the qNaN
81-
// bit (if needed) and right-aligning the rest of the trailing NaN
82-
// payload field.
83-
absResult = (dst_rep_t)dstInfExp << dstSigBits;
84-
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
85-
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
71+
dstExp = dstInfExp;
72+
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
8673
}
8774

88-
else if (aAbs) {
75+
else if (srcSigFrac) {
8976
// a is denormal.
90-
// renormalize the significand and clear the leading bit, then insert
91-
// the correct adjusted exponent in the destination type.
92-
const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
93-
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
94-
absResult ^= dstMinNormal;
95-
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
96-
absResult |= (dst_rep_t)resultExponent << dstSigBits;
77+
if (srcExpBits == dstExpBits) {
78+
// The exponent fields are identical and this is a denormal number, so all
79+
// the non-significand bits are zero. In particular, this branch is always
80+
// taken when we extend a denormal F80 to F128.
81+
dstExp = 0;
82+
dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
83+
} else {
84+
#ifndef src_rep_t_clz
85+
// If src_rep_t_clz is not defined this branch must be unreachable.
86+
__builtin_unreachable();
87+
#else
88+
// Renormalize the significand and clear the leading bit.
89+
// For F80 -> F128 this codepath is unused.
90+
const int scale = clz_in_sig_frac(srcSigFrac) + 1;
91+
dstExp = dstExpBias - srcExpBias - scale + 1;
92+
dstSigFrac = (dst_rep_t)srcSigFrac
93+
<< (dstSigFracBits - srcSigFracBits + scale);
94+
const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
95+
dstSigFrac ^= dstMinNormal;
96+
#endif
97+
}
9798
}
9899

99100
else {
100101
// a is zero.
101-
absResult = 0;
102+
dstExp = 0;
103+
dstSigFrac = 0;
102104
}
103105

104-
// Apply the signbit to the absolute value.
105-
const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
106+
const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
106107
return dstFromRep(result);
107108
}

0 commit comments

Comments
 (0)