Fold fma x, -1.0, y into fsub x, y (#100106)

dtcxzyw · web-flow · commit 16f22c0fe6fa · 2024-07-23T20:13:23.000+08:00
Alive2 proof (Please run alive-tv locally with larger `smt-to`): https://alive2.llvm.org/ce/z/YvUVg-
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2480,6 +2480,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     // fma fneg(x), fneg(y), z -> fma x, y, z
     Value *Src0 = II->getArgOperand(0);
     Value *Src1 = II->getArgOperand(1);
+    Value *Src2 = II->getArgOperand(2);
     Value *X, *Y;
     if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
       replaceOperand(*II, 0, X);
@@ -2497,22 +2498,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
 
     // Try to simplify the underlying FMul. We can only apply simplifications
     // that do not require rounding.
-    if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
-                                   II->getFastMathFlags(),
+    if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
                                    SQ.getWithInstruction(II))) {
-      auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
+      auto *FAdd = BinaryOperator::CreateFAdd(V, Src2);
       FAdd->copyFastMathFlags(II);
       return FAdd;
     }
 
     // fma x, y, 0 -> fmul x, y
     // This is always valid for -0.0, but requires nsz for +0.0 as
     // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
-    if (match(II->getArgOperand(2), m_NegZeroFP()) ||
-        (match(II->getArgOperand(2), m_PosZeroFP()) &&
-         II->getFastMathFlags().noSignedZeros()))
+    if (match(Src2, m_NegZeroFP()) ||
+        (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
       return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
 
+    // fma x, -1.0, y -> fsub y, x
+    if (match(Src1, m_SpecificFP(-1.0)))
+      return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
+
     break;
   }
   case Intrinsic::copysign: {
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
@@ -856,3 +856,68 @@ define <2 x float> @fma_unary_shuffle_ops_uses(<2 x float> %x, <2 x float> %y, <
   %r = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
   ret <2 x float> %r
 }
+
+define half @fma_negone(half %x, half %y) {
+; CHECK-LABEL: @fma_negone(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub half [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret half [[SUB]]
+;
+  %sub = call half @llvm.fma.f16(half %x, half -1.0, half %y)
+  ret half %sub
+}
+
+define half @fmuladd_negone(half %x, half %y) {
+; CHECK-LABEL: @fmuladd_negone(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub half [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret half [[SUB]]
+;
+  %sub = call half @llvm.fmuladd.f16(half %x, half -1.0, half %y)
+  ret half %sub
+}
+
+define half @fma_negone_fmf(half %x, half %y) {
+; CHECK-LABEL: @fma_negone_fmf(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub nnan ninf nsz half [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret half [[SUB]]
+;
+  %sub = call nnan ninf nsz half @llvm.fma.f16(half %x, half -1.0, half %y)
+  ret half %sub
+}
+
+define half @fmuladd_negone_fmf(half %x, half %y) {
+; CHECK-LABEL: @fmuladd_negone_fmf(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub nnan ninf nsz half [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret half [[SUB]]
+;
+  %sub = call nnan ninf nsz half @llvm.fmuladd.f16(half %x, half -1.0, half %y)
+  ret half %sub
+}
+
+define <2 x half> @fma_negone_vec(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: @fma_negone_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub <2 x half> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x half> [[SUB]]
+;
+  %sub = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> splat(half -1.0), <2 x half> %y)
+  ret <2 x half> %sub
+}
+
+define <2 x half> @fma_negone_vec_partial_undef(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: @fma_negone_vec_partial_undef(
+; CHECK-NEXT:    [[SUB:%.*]] = call <2 x half> @llvm.fma.v2f16(<2 x half> [[X:%.*]], <2 x half> <half undef, half 0xHBC00>, <2 x half> [[Y:%.*]])
+; CHECK-NEXT:    ret <2 x half> [[SUB]]
+;
+  %sub = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half undef, half -1.0>, <2 x half> %y)
+  ret <2 x half> %sub
+}
+
+; negative tests
+
+define half @fma_non_negone(half %x, half %y) {
+; CHECK-LABEL: @fma_non_negone(
+; CHECK-NEXT:    [[SUB:%.*]] = call half @llvm.fma.f16(half [[X:%.*]], half 0xHBE00, half [[Y:%.*]])
+; CHECK-NEXT:    ret half [[SUB]]
+;
+  %sub = call half @llvm.fma.f16(half %x, half -1.5, half %y)
+  ret half %sub
+}