[GlobalIsel] Combine logic of floating point compares (#81886)

Thorsten Schütt · web-flow · commit 63a4b4f610e7 · 2024-02-20T09:56:33.000+01:00
It is purely based on symmetry. Registers can be scalars, vectors, and
non-constants.

X &lt; 5.0 || X &gt; 5.0
  -&gt;
X != 5.0

X &lt; Y &amp;&amp; X &gt; Y
  -&gt;
  FCMP_FALSE

X &lt; Y &amp;&amp; X &lt; Y
  -&gt;
  FCMP_TRUE

see InstCombinerImpl::foldLogicOfFCmps
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -927,6 +927,9 @@ class CombinerHelper {
   /// into a single comparison using range-based reasoning.
   bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
                                       BuildFnTy &MatchInfo);
+
+  // Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
+  bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -6817,12 +6818,90 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
   return true;
 }
 
+bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
+                                         BuildFnTy &MatchInfo) {
+  assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
+  Register DestReg = Logic->getReg(0);
+  Register LHS = Logic->getLHSReg();
+  Register RHS = Logic->getRHSReg();
+  bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
+
+  // We need a compare on the LHS register.
+  GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
+  if (!Cmp1)
+    return false;
+
+  // We need a compare on the RHS register.
+  GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
+  if (!Cmp2)
+    return false;
+
+  LLT CmpTy = MRI.getType(Cmp1->getReg(0));
+  LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
+
+  // We build one fcmp, want to fold the fcmps, replace the logic op,
+  // and the fcmps must have the same shape.
+  if (!isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
+      !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
+      MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
+    return false;
+
+  CmpInst::Predicate PredL = Cmp1->getCond();
+  CmpInst::Predicate PredR = Cmp2->getCond();
+  Register LHS0 = Cmp1->getLHSReg();
+  Register LHS1 = Cmp1->getRHSReg();
+  Register RHS0 = Cmp2->getLHSReg();
+  Register RHS1 = Cmp2->getRHSReg();
+
+  if (LHS0 == RHS1 && LHS1 == RHS0) {
+    // Swap RHS operands to match LHS.
+    PredR = CmpInst::getSwappedPredicate(PredR);
+    std::swap(RHS0, RHS1);
+  }
+
+  if (LHS0 == RHS0 && LHS1 == RHS1) {
+    // We determine the new predicate.
+    unsigned CmpCodeL = getFCmpCode(PredL);
+    unsigned CmpCodeR = getFCmpCode(PredR);
+    unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
+    unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
+    MatchInfo = [=](MachineIRBuilder &B) {
+      // The fcmp predicates fill the lower part of the enum.
+      FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
+      if (Pred == FCmpInst::FCMP_FALSE &&
+          isConstantLegalOrBeforeLegalizer(CmpTy)) {
+        auto False = B.buildConstant(CmpTy, 0);
+        B.buildZExtOrTrunc(DestReg, False);
+      } else if (Pred == FCmpInst::FCMP_TRUE &&
+                 isConstantLegalOrBeforeLegalizer(CmpTy)) {
+        auto True =
+            B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
+                                                  CmpTy.isVector() /*isVector*/,
+                                                  true /*isFP*/));
+        B.buildZExtOrTrunc(DestReg, True);
+      } else { // We take the predicate without predicate optimizations.
+        auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
+        B.buildZExtOrTrunc(DestReg, Cmp);
+      }
+    };
+    return true;
+  }
+
+  return false;
+}
+
 bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
   GAnd *And = cast<GAnd>(&MI);
 
   if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
     return true;
 
+  if (tryFoldLogicOfFCmps(And, MatchInfo))
+    return true;
+
   return false;
 }
 
@@ -6832,5 +6911,8 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
   if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
     return true;
 
+  if (tryFoldLogicOfFCmps(Or, MatchInfo))
+    return true;
+
   return false;
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
@@ -260,3 +260,149 @@ body:             |
     %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
     $q0 = COPY %zext
 ...
+---
+# fcmp (x, y) || fcmp (x, y) -> fcmp(x, y)
+name:            test_fcmp_or_fcmp_with_x_y
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_x_y
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ueq), [[COPY]](s64), [[COPY1]]
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+    ; CHECK-NEXT: $x0 = COPY %zext(s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %cmp1:_(s1) = G_FCMP floatpred(oeq), %0(s64), %1
+    %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+    %or:_(s1) = G_OR %cmp1, %cmp2
+    %zext:_(s64) = G_ZEXT %or(s1)
+    $x0 = COPY %zext
+...
+---
+# fcmp (5, y) || fcmp (y, 5) -> fcmp(x, y)
+name:            test_fcmp_or_fcmp_with_5_y
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_5_y
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e+00
+    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY]](s64), [[C]]
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+    ; CHECK-NEXT: $x0 = COPY %zext(s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = G_FCONSTANT double 5.0
+    %cmp1:_(s1) = G_FCMP floatpred(one), %0(s64), %2
+    %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %2
+    %or:_(s1) = G_OR %cmp1, %cmp2
+    %zext:_(s64) = G_ZEXT %or(s1)
+    $x0 = COPY %zext
+...
+---
+# fcmp (x, y) || fcmp (y, x) -> fcmp(x, y)
+name:            test_fcmp_or_fcmp_with_anti
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_anti
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY1]](s64), [[COPY]]
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+    ; CHECK-NEXT: $x0 = COPY %zext(s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %cmp1:_(s1) = G_FCMP floatpred(one), %1(s64), %0
+    %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+    %or:_(s1) = G_OR %cmp1, %cmp2
+    %zext:_(s64) = G_ZEXT %or(s1)
+    $x0 = COPY %zext
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name:            test_fcmp_and_fcmp_with_x_y
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(uno), [[COPY1]](s64), [[COPY]]
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+    ; CHECK-NEXT: $x0 = COPY %zext(s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %cmp1:_(s1) = G_FCMP floatpred(une), %1(s64), %0
+    %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+    %and:_(s1) = G_AND %cmp1, %cmp2
+    %zext:_(s64) = G_ZEXT %and(s1)
+    $x0 = COPY %zext
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name:            test_fcmp_and_fcmp_with_x_y_multi_use
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y_multi_use
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %cmp1:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s64), [[COPY]]
+    ; CHECK-NEXT: %cmp2:_(s1) = G_FCMP floatpred(ugt), [[COPY]](s64), [[COPY1]]
+    ; CHECK-NEXT: %and:_(s1) = G_AND %cmp1, %cmp2
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %and(s1)
+    ; CHECK-NEXT: %zext2:_(s64) = G_ZEXT %and(s1)
+    ; CHECK-NEXT: $x0 = COPY %zext(s64)
+    ; CHECK-NEXT: $x2 = COPY %zext2(s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %cmp1:_(s1) = G_FCMP floatpred(ogt), %1(s64), %0
+    %cmp2:_(s1) = G_FCMP floatpred(ugt), %0(s64), %1
+    %and:_(s1) = G_AND %cmp1, %cmp2
+    %zext:_(s64) = G_ZEXT %and(s1)
+    %zext2:_(s64) = G_ZEXT %and(s1)
+    $x0 = COPY %zext
+    $x2 = COPY %zext2
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name:            test_fcmp_and_fcmp_with_vectors
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_vectors
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1)
+    ; CHECK-NEXT: %zext:_(<2 x s64>) = G_ZEXT [[BUILD_VECTOR]](<2 x s1>)
+    ; CHECK-NEXT: $q0 = COPY %zext(<2 x s64>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %4:_(s64) = COPY $x4
+    %5:_(s64) = COPY $x5
+    %6:_(s64) = COPY $x6
+    %7:_(s64) = COPY $x7
+    %v8:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64)
+    %v9:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64)
+    %cmp1:_(<2 x s1>) = G_FCMP floatpred(oeq), %v8(<2 x s64>), %v9
+    %cmp2:_(<2 x s1>) = G_FCMP floatpred(olt), %v8(<2 x s64>), %v9
+    %and:_(<2 x s1>) = G_AND %cmp1, %cmp2
+    %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
+    $q0 = COPY %zext
+...