GPUOpen-Drivers
diff --git a/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎compiler-rt/lib/builtins/extendxftf2.c
Lines changed: 23 additions & 0 deletions b/‎compiler-rt/lib/builtins/extendxftf2.c
Lines changed: 23 additions & 0 deletions
diff --git a/‎compiler-rt/lib/builtins/fp_extend.h
Lines changed: 88 additions & 11 deletions b/‎compiler-rt/lib/builtins/fp_extend.h
Lines changed: 88 additions & 11 deletions
diff --git a/‎compiler-rt/lib/builtins/fp_extend_impl.inc
Lines changed: 42 additions & 41 deletions b/‎compiler-rt/lib/builtins/fp_extend_impl.inc
Lines changed: 42 additions & 41 deletions
@@ -280,6 +280,7 @@ endif ()
 # long double is not 80 bits on Android or MSVC.
 set(x86_80_BIT_SOURCES
   divxc3.c
+  extendxftf2.c
   fixxfdi.c
   fixxfti.c
   fixunsxfdi.c
@@ -291,6 +292,7 @@ set(x86_80_BIT_SOURCES
   floatuntixf.c
   mulxc3.c
   powixf2.c
+  trunctfxf2.c
 )
 
 if (NOT MSVC)
 
@@ -0,0 +1,23 @@
+//===-- lib/extendxftf2.c - long double -> quad conversion --------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Assumption: long double is a IEEE 80 bit floating point type padded to 128
+// bits.
+
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+#define SRC_80
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
+  return __extendXfYf2__(a);
+}
+
+#endif
@@ -20,15 +20,24 @@
 typedef float src_t;
 typedef uint32_t src_rep_t;
 #define SRC_REP_C UINT32_C
-static const int srcSigBits = 23;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
 #define src_rep_t_clz clzsi
 
 #elif defined SRC_DOUBLE
 typedef double src_t;
 typedef uint64_t src_rep_t;
 #define SRC_REP_C UINT64_C
-static const int srcSigBits = 52;
-static __inline int src_rep_t_clz(src_rep_t a) {
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
 #if defined __LP64__
   return __builtin_clzl(a);
 #else
@@ -38,6 +47,19 @@ static __inline int src_rep_t_clz(src_rep_t a) {
     return 32 + clzsi(a & REP_C(0xffffffff));
 #endif
 }
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#elif defined SRC_80
+typedef long double src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+// sign bit, exponent and significand occupy the lower 80 bits.
+static const int srcBits = 80;
+static const int srcSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// srcBits - srcSigFracBits - 1 - 1
+static const int srcExpBits = 15;
 
 #elif defined SRC_HALF
 #ifdef COMPILER_RT_HAS_FLOAT16
@@ -47,7 +69,12 @@ typedef uint16_t src_t;
 #endif
 typedef uint16_t src_rep_t;
 #define SRC_REP_C UINT16_C
-static const int srcSigBits = 10;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 10;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 5;
+
 #define src_rep_t_clz __builtin_clz
 
 #else
@@ -58,36 +85,86 @@ static const int srcSigBits = 10;
 typedef float dst_t;
 typedef uint32_t dst_rep_t;
 #define DST_REP_C UINT32_C
-static const int dstSigBits = 23;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
 
 #elif defined DST_DOUBLE
 typedef double dst_t;
 typedef uint64_t dst_rep_t;
 #define DST_REP_C UINT64_C
-static const int dstSigBits = 52;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
 
 #elif defined DST_QUAD
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 113
 typedef long double dst_t;
+#elif defined(__x86_64__) &&                                                   \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+typedef __float128 dst_t;
+#endif
 typedef __uint128_t dst_rep_t;
 #define DST_REP_C (__uint128_t)
-static const int dstSigBits = 112;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 112;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 15;
 
 #else
 #error Destination should be single, double, or quad precision!
 #endif // end destination precision
 
-// End of specialization parameters.  Two helper routines for conversion to and
-// from the representation of floating-point data as integer values follow.
+// End of specialization parameters.
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+  return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+  const int srcSigBits = srcBits - 1 - srcExpBits;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+  return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  return x & srcSigFracMask;
+}
+
+#ifdef src_rep_t_clz
+static inline int clz_in_sig_frac(src_rep_t sigFrac) {
+      const int skip = (sizeof(dst_t) * CHAR_BIT - srcBits) + 1 + srcExpBits;
+      return src_rep_t_clz(sigFrac) - skip;
+}
+#endif
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+  return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+}
+
+// Two helper routines for conversion to and from the representation of
+// floating-point data as integer values follow.
 
-static __inline src_rep_t srcToRep(src_t x) {
+static inline src_rep_t srcToRep(src_t x) {
   const union {
     src_t f;
     src_rep_t i;
   } rep = {.f = x};
   return rep.i;
 }
 
-static __inline dst_t dstFromRep(dst_rep_t x) {
+static inline dst_t dstFromRep(dst_rep_t x) {
   const union {
     dst_t f;
     dst_rep_t i;
 
@@ -37,71 +37,72 @@
 
 #include "fp_extend.h"
 
+// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
+// format. In particular, for the source type srcSigFracBits may be not equal to
+// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
+// types.
 static __inline dst_t __extendXfYf2__(src_t a) {
   // Various constants whose values follow from the type parameters.
   // Any reasonable optimizer will fold and propagate all of these.
-  const int srcBits = sizeof(src_t) * CHAR_BIT;
-  const int srcExpBits = srcBits - srcSigBits - 1;
   const int srcInfExp = (1 << srcExpBits) - 1;
   const int srcExpBias = srcInfExp >> 1;
 
-  const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
-  const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
-  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
-  const src_rep_t srcAbsMask = srcSignMask - 1;
-  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
-  const src_rep_t srcNaNCode = srcQNaN - 1;
-
-  const int dstBits = sizeof(dst_t) * CHAR_BIT;
-  const int dstExpBits = dstBits - dstSigBits - 1;
   const int dstInfExp = (1 << dstExpBits) - 1;
   const int dstExpBias = dstInfExp >> 1;
 
-  const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
-
   // Break a into a sign and representation of the absolute value.
   const src_rep_t aRep = srcToRep(a);
-  const src_rep_t aAbs = aRep & srcAbsMask;
-  const src_rep_t sign = aRep & srcSignMask;
-  dst_rep_t absResult;
+  const src_rep_t srcSign = extract_sign_from_src(aRep);
+  const src_rep_t srcExp = extract_exp_from_src(aRep);
+  const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
+
+  dst_rep_t dstSign = srcSign;
+  dst_rep_t dstExp;
+  dst_rep_t dstSigFrac;
 
-  // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
-  // to (signed) int.  To avoid that, explicitly cast to src_rep_t.
-  if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
+  if (srcExp >= 1 && srcExp < srcInfExp) {
     // a is a normal number.
-    // Extend to the destination type by shifting the significand and
-    // exponent into the proper position and rebiasing the exponent.
-    absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
-    absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
+    dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
+    dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
   }
 
-  else if (aAbs >= srcInfinity) {
+  else if (srcExp == srcInfExp) {
     // a is NaN or infinity.
-    // Conjure the result by beginning with infinity, then setting the qNaN
-    // bit (if needed) and right-aligning the rest of the trailing NaN
-    // payload field.
-    absResult = (dst_rep_t)dstInfExp << dstSigBits;
-    absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
-    absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
+    dstExp = dstInfExp;
+    dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
   }
 
-  else if (aAbs) {
+  else if (srcSigFrac) {
     // a is denormal.
-    // renormalize the significand and clear the leading bit, then insert
-    // the correct adjusted exponent in the destination type.
-    const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
-    absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
-    absResult ^= dstMinNormal;
-    const int resultExponent = dstExpBias - srcExpBias - scale + 1;
-    absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    if (srcExpBits == dstExpBits) {
+      // The exponent fields are identical and this is a denormal number, so all
+      // the non-significand bits are zero. In particular, this branch is always
+      // taken when we extend a denormal F80 to F128.
+      dstExp = 0;
+      dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
+    } else {
+#ifndef src_rep_t_clz
+      // If src_rep_t_clz is not defined this branch must be unreachable.
+      __builtin_unreachable();
+#else
+      // Renormalize the significand and clear the leading bit.
+      // For F80 -> F128 this codepath is unused.
+      const int scale = clz_in_sig_frac(srcSigFrac) + 1;
+      dstExp = dstExpBias - srcExpBias - scale + 1;
+      dstSigFrac = (dst_rep_t)srcSigFrac
+                   << (dstSigFracBits - srcSigFracBits + scale);
+      const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
+      dstSigFrac ^= dstMinNormal;
+#endif
+    }
   }
 
   else {
     // a is zero.
-    absResult = 0;
+    dstExp = 0;
+    dstSigFrac = 0;
   }
 
-  // Apply the signbit to the absolute value.
-  const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
+  const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
   return dstFromRep(result);
 }