llvm · jreiffers · Apr 12, 2024 · Apr 12, 2024
diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
@@ -27,35 +27,52 @@ using namespace mlir;
 
 namespace {
 
+// Returns the absolute value or its square root.
+Value computeAbs(Value real, Value imag, arith::FastMathFlags fmf,
+                 ImplicitLocOpBuilder &b, bool returnSqrt = false) {
+  Value one = b.create<arith::ConstantOp>(real.getType(),
+                                          b.getFloatAttr(real.getType(), 1.0));
+
+  Value absReal = b.create<math::AbsFOp>(real, fmf);
+  Value absImag = b.create<math::AbsFOp>(imag, fmf);
+
+  Value max = b.create<arith::MaximumFOp>(absReal, absImag, fmf);
+  Value min = b.create<arith::MinimumFOp>(absReal, absImag, fmf);
+  Value ratio = b.create<arith::DivFOp>(min, max, fmf);
+  Value ratioSq = b.create<arith::MulFOp>(ratio, ratio, fmf);
+  Value ratioSqPlusOne = b.create<arith::AddFOp>(ratioSq, one, fmf);
+  Value result;
+
+  if (returnSqrt) {
+    Value quarter = b.create<arith::ConstantOp>(
+        real.getType(), b.getFloatAttr(real.getType(), 0.25));
+    // sqrt(sqrt(a*b)) would avoid the pow, but will overflow more easily.
+    Value sqrt = b.create<math::SqrtOp>(max, fmf);
+    Value p025 = b.create<math::PowFOp>(ratioSqPlusOne, quarter, fmf);
+    result = b.create<arith::MulFOp>(sqrt, p025, fmf);
+  } else {
+    Value sqrt = b.create<math::SqrtOp>(ratioSqPlusOne, fmf);
+    result = b.create<arith::MulFOp>(max, sqrt, fmf);
+  }
+
+  Value isNaN =
+      b.create<arith::CmpFOp>(arith::CmpFPredicate::UNO, result, result, fmf);
+  return b.create<arith::SelectOp>(isNaN, min, result);
+}
+
 struct AbsOpConversion : public OpConversionPattern<complex::AbsOp> {
   using OpConversionPattern<complex::AbsOp>::OpConversionPattern;
 
   LogicalResult
   matchAndRewrite(complex::AbsOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter);
+    ImplicitLocOpBuilder b(op.getLoc(), rewriter);
 
     arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue();
 
-    Type elementType = op.getType();
-    Value one = b.create<arith::ConstantOp>(elementType,
-                                            b.getFloatAttr(elementType, 1.0));
-
     Value real = b.create<complex::ReOp>(adaptor.getComplex());
     Value imag = b.create<complex::ImOp>(adaptor.getComplex());
-    Value absReal = b.create<math::AbsFOp>(real, fmf);
-    Value absImag = b.create<math::AbsFOp>(imag, fmf);
-
-    Value max = b.create<arith::MaximumFOp>(absReal, absImag, fmf);
-    Value min = b.create<arith::MinimumFOp>(absReal, absImag, fmf);
-    Value ratio = b.create<arith::DivFOp>(min, max, fmf);
-    Value ratioSq = b.create<arith::MulFOp>(ratio, ratio, fmf);
-    Value ratioSqPlusOne = b.create<arith::AddFOp>(ratioSq, one, fmf);
-    Value sqrt = b.create<math::SqrtOp>(ratioSqPlusOne, fmf);
-    Value result = b.create<arith::MulFOp>(max, sqrt, fmf);
-    Value isNaN =
-        b.create<arith::CmpFOp>(arith::CmpFPredicate::UNO, result, result, fmf);
-    rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNaN, min, result);
+    rewriter.replaceOp(op, computeAbs(real, imag, fmf, b));
 
     return success();
   }
@@ -829,60 +846,71 @@ struct SqrtOpConversion : public OpConversionPattern<complex::SqrtOp> {
   LogicalResult
   matchAndRewrite(complex::SqrtOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter);
+    ImplicitLocOpBuilder b(op.getLoc(), rewriter);
 
     auto type = cast<ComplexType>(op.getType());
-    Type elementType = type.getElementType();
-    Value arg = adaptor.getComplex();
-    arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr();
-
-    Value zero =
-        b.create<arith::ConstantOp>(elementType, b.getZeroAttr(elementType));
-
-    Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
-    Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
-
-    Value absLhs = b.create<math::AbsFOp>(real, fmf);
-    Value absArg = b.create<complex::AbsOp>(elementType, arg, fmf);
-    Value addAbs = b.create<arith::AddFOp>(absLhs, absArg, fmf);
+    auto elementType = type.getElementType().cast<FloatType>();
+    arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue();
 
+    auto cst = [&](APFloat v) {
+      return b.create<arith::ConstantOp>(elementType,
+                                         b.getFloatAttr(elementType, v));
+    };
+    const auto &floatSemantics = elementType.getFloatSemantics();
+    Value zero = cst(APFloat::getZero(floatSemantics));
     Value half = b.create<arith::ConstantOp>(elementType,
                                              b.getFloatAttr(elementType, 0.5));
-    Value halfAddAbs = b.create<arith::MulFOp>(addAbs, half, fmf);
-    Value sqrtAddAbs = b.create<math::SqrtOp>(halfAddAbs, fmf);
-
-    Value realIsNegative =
-        b.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, real, zero);
-    Value imagIsNegative =
-        b.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, imag, zero);
-
-    Value resultReal = sqrtAddAbs;
-
-    Value imagDivTwoResultReal = b.create<arith::DivFOp>(
-        imag, b.create<arith::AddFOp>(resultReal, resultReal, fmf), fmf);
-
-    Value negativeResultReal = b.create<arith::NegFOp>(resultReal);
 
+    Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
+    Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
+    Value absSqrt = computeAbs(real, imag, fmf, b, /*returnSqrt=*/true);
+    Value argArg = b.create<math::Atan2Op>(imag, real, fmf);
+    Value sqrtArg = b.create<arith::MulFOp>(argArg, half, fmf);
+    Value cos = b.create<math::CosOp>(sqrtArg, fmf);
+    Value sin = b.create<math::SinOp>(sqrtArg, fmf);
+    // sin(atan2(0, inf)) = 0, sqrt(abs(inf)) = inf, but we can't multiply
+    // 0 * inf.
+    Value sinIsZero =
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, sin, zero, fmf);
+
+    Value resultReal = b.create<arith::MulFOp>(absSqrt, cos, fmf);
     Value resultImag = b.create<arith::SelectOp>(
-        realIsNegative,
-        b.create<arith::SelectOp>(imagIsNegative, negativeResultReal,
-                                  resultReal),
-        imagDivTwoResultReal);
-
-    resultReal = b.create<arith::SelectOp>(
-        realIsNegative,
-        b.create<arith::DivFOp>(
-            imag, b.create<arith::AddFOp>(resultImag, resultImag, fmf), fmf),
-        resultReal);
-
-    Value realIsZero =
-        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, real, zero);
-    Value imagIsZero =
-        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, imag, zero);
-    Value argIsZero = b.create<arith::AndIOp>(realIsZero, imagIsZero);
-
-    resultReal = b.create<arith::SelectOp>(argIsZero, zero, resultReal);
-    resultImag = b.create<arith::SelectOp>(argIsZero, zero, resultImag);
+        sinIsZero, zero, b.create<arith::MulFOp>(absSqrt, sin, fmf));
+    if (!arith::bitEnumContainsAll(fmf, arith::FastMathFlags::nnan |
+                                            arith::FastMathFlags::ninf)) {
+      Value inf = cst(APFloat::getInf(floatSemantics));
+      Value negInf = cst(APFloat::getInf(floatSemantics, true));
+      Value nan = cst(APFloat::getNaN(floatSemantics));
+      Value absImag = b.create<math::AbsFOp>(elementType, imag, fmf);
+
+      Value absImagIsInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, absImag, inf, fmf);
+      Value absImagIsNotInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::ONE, absImag, inf, fmf);
+      Value realIsInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, real, inf, fmf);
+      Value realIsNegInf =
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, real, negInf, fmf);
+
+      resultReal = b.create<arith::SelectOp>(
+          b.create<arith::AndIOp>(realIsNegInf, absImagIsNotInf), zero,
+          resultReal);
+      resultReal = b.create<arith::SelectOp>(
+          b.create<arith::OrIOp>(absImagIsInf, realIsInf), inf, resultReal);
+
+      Value imagSignInf = b.create<math::CopySignOp>(inf, imag, fmf);
+      resultImag = b.create<arith::SelectOp>(
+          b.create<arith::CmpFOp>(arith::CmpFPredicate::UNO, absSqrt, absSqrt),
+          nan, resultImag);
+      resultImag = b.create<arith::SelectOp>(
+          b.create<arith::OrIOp>(absImagIsInf, realIsNegInf), imagSignInf,
+          resultImag);
+    }
+
+    Value resultIsZero =
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, absSqrt, zero, fmf);
+    resultReal = b.create<arith::SelectOp>(resultIsZero, zero, resultReal);
+    resultImag = b.create<arith::SelectOp>(resultIsZero, zero, resultImag);
 
     rewriter.replaceOpWithNewOp<complex::CreateOp>(op, type, resultReal,
                                                    resultImag);
@@ -1065,27 +1093,27 @@ static Value powOpConversionImpl(mlir::ImplicitLocOpBuilder &builder,
   // Case 2:
   // 1^(c + d*i) = 1 + 0*i
   Value lhsEqOne = builder.create<arith::AndIOp>(
-      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, a, one),
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, a, one, fmf),
       bEqZero);
   Value cutoff2 =
       builder.create<arith::SelectOp>(lhsEqOne, complexOne, cutoff1);
 
   // Case 3:
   // inf^(c + 0*i) = inf + 0*i, c > 0
   Value lhsEqInf = builder.create<arith::AndIOp>(
-      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, a, inf),
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, a, inf, fmf),
       bEqZero);
   Value rhsGt0 = builder.create<arith::AndIOp>(
       dEqZero,
-      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, c, zero));
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, c, zero, fmf));
   Value cutoff3 = builder.create<arith::SelectOp>(
       builder.create<arith::AndIOp>(lhsEqInf, rhsGt0), complexInf, cutoff2);
 
   // Case 4:
   // inf^(c + 0*i) = 0 + 0*i, c < 0
   Value rhsLt0 = builder.create<arith::AndIOp>(
       dEqZero,
-      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, c, zero));
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, c, zero, fmf));
   Value cutoff4 = builder.create<arith::SelectOp>(
       builder.create<arith::AndIOp>(lhsEqInf, rhsLt0), complexZero, cutoff3);