Replace incorrect implementation of mad_sat builtin with libclc version

aratajew · igcbot · commit 052eb47492f7 · 2021-07-13T12:15:25.000+02:00
IGC mad_sat implementation was returning wrong results for some input data.
It's assumptions regarding higher part of multiplication result were incorrect
which resulted in returning LONG_MIN even though result value was in saturation
range, so should not be clamped.

To avoid reinventing the wheel, it's better to reuse mad_sat implementation from libclc
which should already be deeply tested.
diff --git a/IGC/BiFModule/Implementation/ExternalLibraries/libclc/hadd.cl b/IGC/BiFModule/Implementation/ExternalLibraries/libclc/hadd.cl
@@ -0,0 +1,5 @@
+#include "../../../Headers/spirv.h"
+
+INLINE OVERLOADABLE long libclc_hadd(long x, long y) {
+    return (x >> (long)1) + (y >> (long)1) + (x & y & (long)1);
+}
diff --git a/IGC/BiFModule/Implementation/ExternalLibraries/libclc/mad_sat.cl b/IGC/BiFModule/Implementation/ExternalLibraries/libclc/mad_sat.cl
@@ -0,0 +1,24 @@
+#include "../../include/BiF_Definitions.cl"
+#include "../../../Headers/spirv.h"
+#include "mul_hi.cl"
+
+INLINE OVERLOADABLE long libclc_mad_sat(long x, long y, long z) {
+    long hi = libclc_mul_hi(x, y);
+    ulong ulo = x * y;
+    long  slo = x * y;
+    /* Big overflow of more than 2 bits, add can't fix this */
+    if (((x < 0) == (y < 0)) && hi != 0)
+        return LONG_MAX;
+    /* Low overflow in mul and z not neg enough to correct it */
+    if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
+        return LONG_MAX;
+    /* Big overflow of more than 2 bits, add can't fix this */
+    if (((x < 0) != (y < 0)) && hi != -1)
+        return LONG_MIN;
+    /* Low overflow in mul and z not pos enough to correct it */
+    if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo)))
+        return LONG_MIN;
+    /* We have checked all conditions, any overflow in addition returns
+     * the correct value */
+    return ulo + z;
+}
diff --git a/IGC/BiFModule/Implementation/ExternalLibraries/libclc/mul_hi.cl b/IGC/BiFModule/Implementation/ExternalLibraries/libclc/mul_hi.cl
@@ -0,0 +1,38 @@
+#include "../../include/BiF_Definitions.cl"
+#include "../../../Headers/spirv.h"
+#include "hadd.cl"
+
+ //FOIL-based long mul_hi
+ //
+ // Summary: Treat mul_hi(long x, long y) as:
+ // (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
+ // and b and d are the low-order parts of x and y.
+ // Thinking back to algebra, we use FOIL to do the work.
+
+INLINE OVERLOADABLE long libclc_mul_hi(long x, long y) {
+    long f, o, i;
+    ulong l;
+
+    //Move the high/low halves of x/y into the lower 32-bits of variables so
+    //that we can multiply them without worrying about overflow.
+    long x_hi = x >> 32;
+    long x_lo = x & UINT_MAX;
+    long y_hi = y >> 32;
+    long y_lo = y & UINT_MAX;
+
+    //Multiply all of the components according to FOIL method
+    f = x_hi * y_hi;
+    o = x_hi * y_lo;
+    i = x_lo * y_hi;
+    l = x_lo * y_lo;
+
+    //Now add the components back together in the following steps:
+    //F: doesn't need to be modified
+    //O/I: Need to be added together.
+    //L: Shift right by 32-bits, then add into the sum of O and I
+    //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
+    //
+    //We use hadd to give us a bit of extra precision for the intermediate sums
+    //but as a result, we shift by 31 bits instead of 32
+    return (long)(f + (libclc_hadd(o, (i + (long)((ulong)l >> 32))) >> 31));
+}
diff --git a/IGC/BiFModule/Implementation/Integer/mad_sat.cl b/IGC/BiFModule/Implementation/Integer/mad_sat.cl
@@ -9,6 +9,7 @@ SPDX-License-Identifier: MIT
 #include "../include/BiF_Definitions.cl"
 #include "../../Headers/spirv.h"
 #include "../include/mul_hilo.cl"
+#include "../ExternalLibraries/libclc/mad_sat.cl"
 
 INLINE
 char2 __builtin_spirv_OpenCL_s_mad_sat_v2i8_v2i8_v2i8( char2 a,
@@ -490,23 +491,7 @@ long __builtin_spirv_OpenCL_s_mad_sat_i64_i64_i64( long a,
                                             long b,
                                             long c )
 {
-    long lo;
-    long hi;
-    hi = __builtin_spirv___intc_mul_hilo_i64_i64_p0i64(a, b, &lo);
-    long result_lo = lo + c;
-    if (c >= 0)
-    {
-        if (result_lo < lo)
-            hi++;
-    }
-    else
-    {
-        if (result_lo > lo)
-            hi--;
-    }
-    return  (hi == 0)  ?    result_lo    :
-            (hi <  0)  ?    LONG_MIN :
-                            LONG_MAX;
+    return libclc_mad_sat(a, b, c);
 }
 
 INLINE