[flang] Fix implementation of Kahan summation (#116897)

klausler · web-flow · commit 3f594741cf8e · 2024-11-21T10:47:21.000-08:00
In the runtime's implementation of floating-point SUM, the implementation of Kahan's algorithm for increased precision is incorrect. The running correction factor should be subtracted from each new data item, not added to it. This fix ensures that the sum of 100M random default real values between 0. and 1. is close to 5.E7. See https://en.wikipedia.org/wiki/Kahan_summation_algorithm.
diff --git a/flang/lib/Evaluate/fold-matmul.h b/flang/lib/Evaluate/fold-matmul.h
@@ -61,7 +61,7 @@ static Expr<T> FoldMatmul(FoldingContext &context, FunctionRef<T> &&funcRef) {
           auto product{aElt.Multiply(bElt)};
           overflow |= product.flags.test(RealFlag::Overflow);
           if constexpr (useKahanSummation) {
-            auto next{correction.Add(product.value, rounding)};
+            auto next{product.value.Subtract(correction, rounding)};
             overflow |= next.flags.test(RealFlag::Overflow);
             auto added{sum.Add(next.value, rounding)};
             overflow |= added.flags.test(RealFlag::Overflow);
diff --git a/flang/lib/Evaluate/fold-real.cpp b/flang/lib/Evaluate/fold-real.cpp
@@ -78,7 +78,7 @@ template <int KIND> class Norm2Accumulator {
       auto scaled{item.Divide(scale).value};
       auto square{scaled.Multiply(scaled).value};
       if constexpr (useKahanSummation) {
-        auto next{square.Add(correction_, rounding_)};
+        auto next{square.Subtract(correction_, rounding_)};
         overflow_ |= next.flags.test(RealFlag::Overflow);
         auto sum{element.Add(next.value, rounding_)};
         overflow_ |= sum.flags.test(RealFlag::Overflow);
diff --git a/flang/lib/Evaluate/fold-reduction.h b/flang/lib/Evaluate/fold-reduction.h
@@ -47,7 +47,7 @@ static Expr<T> FoldDotProduct(
       const auto &rounding{context.targetCharacteristics().roundingMode()};
       for (const Element &x : cProducts.values()) {
         if constexpr (useKahanSummation) {
-          auto next{correction.Add(x, rounding)};
+          auto next{x.Subtract(correction, rounding)};
           overflow |= next.flags.test(RealFlag::Overflow);
           auto added{sum.Add(next.value, rounding)};
           overflow |= added.flags.test(RealFlag::Overflow);
@@ -90,7 +90,7 @@ static Expr<T> FoldDotProduct(
       const auto &rounding{context.targetCharacteristics().roundingMode()};
       for (const Element &x : cProducts.values()) {
         if constexpr (useKahanSummation) {
-          auto next{correction.Add(x, rounding)};
+          auto next{x.Subtract(correction, rounding)};
           overflow |= next.flags.test(RealFlag::Overflow);
           auto added{sum.Add(next.value, rounding)};
           overflow |= added.flags.test(RealFlag::Overflow);
@@ -348,7 +348,7 @@ template <typename T> class SumAccumulator {
       overflow_ |= sum.overflow;
       element = sum.value;
     } else { // Real & Complex: use Kahan summation
-      auto next{array_.At(at).Add(correction_, rounding_)};
+      auto next{array_.At(at).Subtract(correction_, rounding_)};
       overflow_ |= next.flags.test(RealFlag::Overflow);
       auto sum{element.Add(next.value, rounding_)};
       overflow_ |= sum.flags.test(RealFlag::Overflow);
diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp
@@ -53,7 +53,7 @@ template <typename INTERMEDIATE> class RealSumAccumulator {
   }
   template <typename A> RT_API_ATTRS bool Accumulate(A x) {
     // Kahan summation
-    auto next{x + correction_};
+    auto next{x - correction_};
     auto oldSum{sum_};
     sum_ += next;
     correction_ = (sum_ - oldSum) - next; // algebraically zero

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ template <typename INTERMEDIATE> class RealSumAccumulator {`
`53`	`53`	`}`
`54`	`54`	`template <typename A> RT_API_ATTRS bool Accumulate(A x) {`
`55`	`55`	`// Kahan summation`
`56`		`- auto next{x + correction_};`
	`56`	`+ auto next{x - correction_};`
`57`	`57`	`auto oldSum{sum_};`
`58`	`58`	`sum_ += next;`
`59`	`59`	`correction_ = (sum_ - oldSum) - next; // algebraically zero`