@@ -52,15 +52,28 @@ template <int KIND> class Norm2Accumulator {
52
52
const Constant<T> &array, const Constant<T> &maxAbs, Rounding rounding)
53
53
: array_{array}, maxAbs_{maxAbs}, rounding_{rounding} {};
54
54
void operator ()(Scalar<T> &element, const ConstantSubscripts &at) {
55
- // Kahan summation of scaled elements
55
+ // Kahan summation of scaled elements:
56
+ // Naively,
57
+ // NORM2(A(:)) = SQRT(SUM(A(:)**2))
58
+ // For any T > 0, we have mathematically
59
+ // SQRT(SUM(A(:)**2))
60
+ // = SQRT(T**2 * (SUM(A(:)**2) / T**2))
61
+ // = SQRT(T**2 * SUM(A(:)**2 / T**2))
62
+ // = SQRT(T**2 * SUM((A(:)/T)**2))
63
+ // = SQRT(T**2) * SQRT(SUM((A(:)/T)**2))
64
+ // = T * SQRT(SUM((A(:)/T)**2))
65
+ // By letting T = MAXVAL(ABS(A)), we ensure that
66
+ // ALL(ABS(A(:)/T) <= 1), so ALL((A(:)/T)**2 <= 1), and the SUM will
67
+ // not overflow unless absolutely necessary.
56
68
auto scale{maxAbs_.At (maxAbsAt_)};
57
69
if (scale.IsZero ()) {
58
- // If maxAbs is zero, so are all elements, and result
70
+ // Maximum value is zero, and so will the result be.
71
+ // Avoid division by zero below.
59
72
element = scale;
60
73
} else {
61
74
auto item{array_.At (at)};
62
75
auto scaled{item.Divide (scale).value };
63
- auto square{item .Multiply (scaled).value };
76
+ auto square{scaled .Multiply (scaled).value };
64
77
auto next{square.Add (correction_, rounding_)};
65
78
overflow_ |= next.flags .test (RealFlag::Overflow);
66
79
auto sum{element.Add (next.value , rounding_)};
@@ -73,13 +86,16 @@ template <int KIND> class Norm2Accumulator {
73
86
}
74
87
bool overflow () const { return overflow_; }
75
88
void Done (Scalar<T> &result) {
89
+ // result+correction == SUM((data(:)/maxAbs)**2)
90
+ // result = maxAbs * SQRT(result+correction)
76
91
auto corrected{result.Add (correction_, rounding_)};
77
92
overflow_ |= corrected.flags .test (RealFlag::Overflow);
78
93
correction_ = Scalar<T>{};
79
- auto rescaled{corrected.value .Multiply (maxAbs_.At (maxAbsAt_))};
94
+ auto root{corrected.value .SQRT ().value };
95
+ auto product{root.Multiply (maxAbs_.At (maxAbsAt_))};
80
96
maxAbs_.IncrementSubscripts (maxAbsAt_);
81
- overflow_ |= rescaled .flags .test (RealFlag::Overflow);
82
- result = rescaled. value . SQRT () .value ;
97
+ overflow_ |= product .flags .test (RealFlag::Overflow);
98
+ result = product .value ;
83
99
}
84
100
85
101
private:
0 commit comments