Fix rsqrt inaccuracies. #88691

jreiffers · 2024-04-15T07:49:27Z

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

llvmbot · 2024-04-15T07:50:00Z

@llvm/pr-subscribers-mlir

Author: Johannes Reifferscheid (jreiffers)

Changes

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

Full diff: https://github.com/llvm/llvm-project/pull/88691.diff

2 Files Affected:

(modified) mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp (+76-12)
(modified) mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir (+16-1)

diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
index 49eb575212ffc1..3ebee9baff31bd 100644
--- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
+++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
@@ -27,9 +27,11 @@ using namespace mlir;
 
 namespace {
 
-// Returns the absolute value or its square root.
+enum class AbsFn { abs, sqrt, rsqrt };
+
+// Returns the absolute value, its square root or its reciprocal square root.
 Value computeAbs(Value real, Value imag, arith::FastMathFlags fmf,
-                 ImplicitLocOpBuilder &b, bool returnSqrt = false) {
+                 ImplicitLocOpBuilder &b, AbsFn fn = AbsFn::abs) {
   Value one = b.create<arith::ConstantOp>(real.getType(),
                                           b.getFloatAttr(real.getType(), 1.0));
 
@@ -43,7 +45,13 @@ Value computeAbs(Value real, Value imag, arith::FastMathFlags fmf,
   Value ratioSqPlusOne = b.create<arith::AddFOp>(ratioSq, one, fmf);
   Value result;
 
-  if (returnSqrt) {
+  if (fn == AbsFn::rsqrt) {
+    ratioSqPlusOne = b.create<math::RsqrtOp>(ratioSqPlusOne, fmf);
+    min = b.create<math::RsqrtOp>(min, fmf);
+    max = b.create<math::RsqrtOp>(max, fmf);
+  }
+
+  if (fn == AbsFn::sqrt) {
     Value quarter = b.create<arith::ConstantOp>(
         real.getType(), b.getFloatAttr(real.getType(), 0.25));
     // sqrt(sqrt(a*b)) would avoid the pow, but will overflow more easily.
@@ -863,7 +871,7 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
 
     Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
     Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
-    Value absSqrt = computeAbs(real, imag, fmf, b, /*returnSqrt=*/true);
+    Value absSqrt = computeAbs(real, imag, fmf, b, AbsFn::sqrt);
     Value argArg = b.create<math::Atan2Op>(imag, real, fmf);
     Value sqrtArg = b.create<arith::MulFOp>(argArg, half, fmf);
     Value cos = b.create<math::CosOp>(sqrtArg, fmf);
@@ -1147,18 +1155,74 @@ struct RsqrtOpConversion : public OpConversionPattern<complex::RsqrtOp> {
   LogicalResult
   matchAndRewrite(complex::RsqrtOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    mlir::ImplicitLocOpBuilder builder(op.getLoc(), rewriter);
+    mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter);
     auto type = cast<ComplexType>(adaptor.getComplex().getType());
     auto elementType = cast<FloatType>(type.getElementType());
 
-    Value c = builder.create<arith::ConstantOp>(
-        elementType, builder.getFloatAttr(elementType, -0.5));
-    Value d = builder.create<arith::ConstantOp>(
-        elementType, builder.getFloatAttr(elementType, 0));
+    arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue();
+
+    auto cst = [&](APFloat v) {
+      return b.create<arith::ConstantOp>(elementType,
+                                         b.getFloatAttr(elementType, v));
+    };
+    const auto &floatSemantics = elementType.getFloatSemantics();
+    Value zero = cst(APFloat::getZero(floatSemantics));
+    Value inf = cst(APFloat::getInf(floatSemantics));
+    Value negHalf = b.create<arith::ConstantOp>(
+        elementType, b.getFloatAttr(elementType, -0.5));
+    Value nan = cst(APFloat::getNaN(floatSemantics));
+
+    Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
+    Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
+    Value absRsqrt = computeAbs(real, imag, fmf, b, AbsFn::rsqrt);
+    Value argArg = b.create<math::Atan2Op>(imag, real, fmf);
+    Value rsqrtArg = b.create<arith::MulFOp>(argArg, negHalf, fmf);
+    Value cos = b.create<math::CosOp>(rsqrtArg, fmf);
+    Value sin = b.create<math::SinOp>(rsqrtArg, fmf);
+
+    Value resultReal = b.create<arith::MulFOp>(absRsqrt, cos, fmf);
+    Value resultImag = b.create<arith::MulFOp>(absRsqrt, sin, fmf);
+
+    if (!arith::bitEnumContainsAll(fmf, arith::FastMathFlags::nnan |
+                                            arith::FastMathFlags::ninf)) {
+      Value negOne = b.create<arith::ConstantOp>(
+          elementType, b.getFloatAttr(elementType, -1));
+
+      Value realSignedZero = b.create<math::CopySignOp>(zero, real, fmf);
+      Value imagSignedZero = b.create<math::CopySignOp>(zero, imag, fmf);
+      Value negImagSignedZero =
+          b.create<arith::MulFOp>(negOne, imagSignedZero, fmf);
 
-    rewriter.replaceOp(op,
-                       {powOpConversionImpl(builder, type, adaptor.getComplex(),
-                                            c, d, op.getFastmath())});
+      Value absReal = b.create<math::AbsFOp>(real, fmf);
+      Value absImag = b.create<math::AbsFOp>(imag, fmf);
+
+      Value absImagIsInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, absImag, inf, fmf);
+      Value realIsNan =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::UNO, real, real, fmf);
+      Value realIsInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, absReal, inf, fmf);
+      Value inIsNanInf = b.create<arith::AndIOp>(absImagIsInf, realIsNan);
+
+      Value resultIsZero = b.create<arith::OrIOp>(inIsNanInf, realIsInf);
+
+      resultReal =
+          b.create<arith::SelectOp>(resultIsZero, realSignedZero, resultReal);
+      resultImag = b.create<arith::SelectOp>(resultIsZero, negImagSignedZero,
+                                             resultImag);
+    }
+
+    Value isRealZero =
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, real, zero, fmf);
+    Value isImagZero =
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, imag, zero, fmf);
+    Value isZero = b.create<arith::AndIOp>(isRealZero, isImagZero);
+
+    resultReal = b.create<arith::SelectOp>(isZero, inf, resultReal);
+    resultImag = b.create<arith::SelectOp>(isZero, nan, resultImag);
+
+    rewriter.replaceOpWithNewOp<complex::CreateOp>(op, type, resultReal,
+                                                   resultImag);
     return success();
   }
 };
diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
index e0e7cdadd317d2..8b4ea9777f7976 100644
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@@ -837,6 +837,21 @@ func.func @complex_rsqrt(%arg: complex<f32>) -> complex<f32> {
   return %rsqrt : complex<f32>
 }
 
+// CHECK-COUNT-5: arith.select
+// CHECK-NOT: arith.select
+
+// -----
+
+// CHECK-LABEL: func @complex_rsqrt_nnan_ninf
+// CHECK-SAME: %[[ARG:.*]]: complex<f32>
+func.func @complex_rsqrt_nnan_ninf(%arg: complex<f32>) -> complex<f32> {
+  %sqrt = complex.rsqrt %arg fastmath<nnan,ninf> : complex<f32>
+  return %sqrt : complex<f32>
+}
+
+// CHECK-COUNT-3: arith.select
+// CHECK-NOT: arith.select
+
 // -----
 
 // CHECK-LABEL:   func.func @complex_angle
@@ -2103,4 +2118,4 @@ func.func @complex_tanh_with_fmf(%arg: complex<f32>) -> complex<f32> {
 // CHECK: %[[NUM:.*]] = complex.create %[[TANH_A]], %[[TAN_B]] : complex<f32>
 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK: %[[MUL:.*]] = arith.mulf %[[TANH_A]], %[[TAN_B]] fastmath<nnan,contract> : f32
-// CHECK: %[[DENOM:.*]] = complex.create %[[ONE]], %[[MUL]] : complex<f32>
\ No newline at end of file
+// CHECK: %[[DENOM:.*]] = complex.create %[[ONE]], %[[MUL]] : complex<f32>

jreiffers · 2024-04-15T09:48:11Z

Looks like this breaks a correctness test, reverting.

This reverts commit 8ddaf75.

Reverts #88691

The current lowering has issues with large/subnormal values. This po XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters. This updates llvm#88691 to also update the correctness test for rsqrt(0). I checked C++ and Python, they both agree the result should be (inf, nan). Updated the correctness test to match this.

The current lowering has issues with large/subnormal values. This po XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters. This updates #88691 to also update the correctness test for rsqrt(0). I checked C++ and Python, they both agree the result should be (inf, nan). Updated the correctness test to match this.

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

Reverts llvm#88691

The current lowering has issues with large/subnormal values. This po XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters. This updates llvm#88691 to also update the correctness test for rsqrt(0). I checked C++ and Python, they both agree the result should be (inf, nan). Updated the correctness test to match this.

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

Reverts llvm#88691

The current lowering has issues with large/subnormal values. This po XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters. This updates llvm#88691 to also update the correctness test for rsqrt(0). I checked C++ and Python, they both agree the result should be (inf, nan). Updated the correctness test to match this.

Fix rsqrt inaccuracies.

09b5fc0

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

jreiffers requested a review from akuegel April 15, 2024 07:49

llvmbot added the mlir label Apr 15, 2024

jreiffers requested a review from pifon2a April 15, 2024 08:45

akuegel approved these changes Apr 15, 2024

View reviewed changes

jreiffers merged commit 8ddaf75 into llvm:main Apr 15, 2024

jreiffers added a commit that referenced this pull request Apr 15, 2024

Revert "Fix rsqrt inaccuracies. (#88691)"

de88bd7

This reverts commit 8ddaf75.

jreiffers mentioned this pull request Apr 15, 2024

Revert "Fix rsqrt inaccuracies." #88705

Merged

jreiffers added a commit that referenced this pull request Apr 15, 2024

Revert "Fix rsqrt inaccuracies." (#88705)

b4e7b56

Reverts #88691

jreiffers mentioned this pull request Apr 15, 2024

Fix rsqrt inaccuracies. #88707

Merged

bazuzi pushed a commit to bazuzi/llvm-project that referenced this pull request Apr 15, 2024

Fix rsqrt inaccuracies. (llvm#88691)

ec23e71

The current lowering has issues with large/subnormal values. This ports XLA's lowering and was verified using XLA's test suite and the MLIR-based emitters.

bazuzi pushed a commit to bazuzi/llvm-project that referenced this pull request Apr 15, 2024

Revert "Fix rsqrt inaccuracies." (llvm#88705)

428f6c2

Reverts llvm#88691

aniplcc pushed a commit to aniplcc/llvm-project that referenced this pull request Apr 15, 2024

Revert "Fix rsqrt inaccuracies." (llvm#88705)

0c1a74e

Reverts llvm#88691

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Fix rsqrt inaccuracies. #88691

Fix rsqrt inaccuracies. #88691

Uh oh!

jreiffers commented Apr 15, 2024

Uh oh!

llvmbot commented Apr 15, 2024

Uh oh!

jreiffers commented Apr 15, 2024

Uh oh!

Uh oh!

Fix rsqrt inaccuracies. #88691

Fix rsqrt inaccuracies. #88691

Uh oh!

Conversation

jreiffers commented Apr 15, 2024

Uh oh!

llvmbot commented Apr 15, 2024

Uh oh!

jreiffers commented Apr 15, 2024

Uh oh!

Uh oh!