Skip to content

Commit 63a4b4f

Browse files
author
Thorsten Schütt
authored
[GlobalIsel] Combine logic of floating point compares (#81886)
It is purely based on symmetry. Registers can be scalars, vectors, and non-constants. X < 5.0 || X > 5.0 -> X != 5.0 X < Y && X > Y -> FCMP_FALSE X < Y && X < Y -> FCMP_TRUE see InstCombinerImpl::foldLogicOfFCmps
1 parent 119a728 commit 63a4b4f

File tree

3 files changed

+231
-0
lines changed

3 files changed

+231
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,9 @@ class CombinerHelper {
927927
/// into a single comparison using range-based reasoning.
928928
bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
929929
BuildFnTy &MatchInfo);
930+
931+
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
932+
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
930933
};
931934
} // namespace llvm
932935

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "llvm/ADT/STLExtras.h"
1111
#include "llvm/ADT/SetVector.h"
1212
#include "llvm/ADT/SmallBitVector.h"
13+
#include "llvm/Analysis/CmpInstAnalysis.h"
1314
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1415
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1516
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -6817,12 +6818,90 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
68176818
return true;
68186819
}
68196820

6821+
bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6822+
BuildFnTy &MatchInfo) {
6823+
assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6824+
Register DestReg = Logic->getReg(0);
6825+
Register LHS = Logic->getLHSReg();
6826+
Register RHS = Logic->getRHSReg();
6827+
bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6828+
6829+
// We need a compare on the LHS register.
6830+
GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
6831+
if (!Cmp1)
6832+
return false;
6833+
6834+
// We need a compare on the RHS register.
6835+
GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
6836+
if (!Cmp2)
6837+
return false;
6838+
6839+
LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6840+
LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
6841+
6842+
// We build one fcmp, want to fold the fcmps, replace the logic op,
6843+
// and the fcmps must have the same shape.
6844+
if (!isLegalOrBeforeLegalizer(
6845+
{TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
6846+
!MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
6847+
!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6848+
!MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
6849+
MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
6850+
return false;
6851+
6852+
CmpInst::Predicate PredL = Cmp1->getCond();
6853+
CmpInst::Predicate PredR = Cmp2->getCond();
6854+
Register LHS0 = Cmp1->getLHSReg();
6855+
Register LHS1 = Cmp1->getRHSReg();
6856+
Register RHS0 = Cmp2->getLHSReg();
6857+
Register RHS1 = Cmp2->getRHSReg();
6858+
6859+
if (LHS0 == RHS1 && LHS1 == RHS0) {
6860+
// Swap RHS operands to match LHS.
6861+
PredR = CmpInst::getSwappedPredicate(PredR);
6862+
std::swap(RHS0, RHS1);
6863+
}
6864+
6865+
if (LHS0 == RHS0 && LHS1 == RHS1) {
6866+
// We determine the new predicate.
6867+
unsigned CmpCodeL = getFCmpCode(PredL);
6868+
unsigned CmpCodeR = getFCmpCode(PredR);
6869+
unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
6870+
unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
6871+
MatchInfo = [=](MachineIRBuilder &B) {
6872+
// The fcmp predicates fill the lower part of the enum.
6873+
FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
6874+
if (Pred == FCmpInst::FCMP_FALSE &&
6875+
isConstantLegalOrBeforeLegalizer(CmpTy)) {
6876+
auto False = B.buildConstant(CmpTy, 0);
6877+
B.buildZExtOrTrunc(DestReg, False);
6878+
} else if (Pred == FCmpInst::FCMP_TRUE &&
6879+
isConstantLegalOrBeforeLegalizer(CmpTy)) {
6880+
auto True =
6881+
B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
6882+
CmpTy.isVector() /*isVector*/,
6883+
true /*isFP*/));
6884+
B.buildZExtOrTrunc(DestReg, True);
6885+
} else { // We take the predicate without predicate optimizations.
6886+
auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
6887+
B.buildZExtOrTrunc(DestReg, Cmp);
6888+
}
6889+
};
6890+
return true;
6891+
}
6892+
6893+
return false;
6894+
}
6895+
68206896
bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
68216897
GAnd *And = cast<GAnd>(&MI);
68226898

68236899
if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
68246900
return true;
68256901

6902+
if (tryFoldLogicOfFCmps(And, MatchInfo))
6903+
return true;
6904+
68266905
return false;
68276906
}
68286907

@@ -6832,5 +6911,8 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
68326911
if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
68336912
return true;
68346913

6914+
if (tryFoldLogicOfFCmps(Or, MatchInfo))
6915+
return true;
6916+
68356917
return false;
68366918
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,149 @@ body: |
260260
%zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
261261
$q0 = COPY %zext
262262
...
263+
---
264+
# fcmp (x, y) || fcmp (x, y) -> fcmp(x, y)
265+
name: test_fcmp_or_fcmp_with_x_y
266+
body: |
267+
bb.1:
268+
liveins: $x0, $x1
269+
; CHECK-LABEL: name: test_fcmp_or_fcmp_with_x_y
270+
; CHECK: liveins: $x0, $x1
271+
; CHECK-NEXT: {{ $}}
272+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
273+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
274+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ueq), [[COPY]](s64), [[COPY1]]
275+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
276+
; CHECK-NEXT: $x0 = COPY %zext(s64)
277+
%0:_(s64) = COPY $x0
278+
%1:_(s64) = COPY $x1
279+
%cmp1:_(s1) = G_FCMP floatpred(oeq), %0(s64), %1
280+
%cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
281+
%or:_(s1) = G_OR %cmp1, %cmp2
282+
%zext:_(s64) = G_ZEXT %or(s1)
283+
$x0 = COPY %zext
284+
...
285+
---
286+
# fcmp (5, y) || fcmp (y, 5) -> fcmp(x, y)
287+
name: test_fcmp_or_fcmp_with_5_y
288+
body: |
289+
bb.1:
290+
liveins: $x0, $x1
291+
; CHECK-LABEL: name: test_fcmp_or_fcmp_with_5_y
292+
; CHECK: liveins: $x0, $x1
293+
; CHECK-NEXT: {{ $}}
294+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
295+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e+00
296+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY]](s64), [[C]]
297+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
298+
; CHECK-NEXT: $x0 = COPY %zext(s64)
299+
%0:_(s64) = COPY $x0
300+
%1:_(s64) = COPY $x1
301+
%2:_(s64) = G_FCONSTANT double 5.0
302+
%cmp1:_(s1) = G_FCMP floatpred(one), %0(s64), %2
303+
%cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %2
304+
%or:_(s1) = G_OR %cmp1, %cmp2
305+
%zext:_(s64) = G_ZEXT %or(s1)
306+
$x0 = COPY %zext
307+
...
308+
---
309+
# fcmp (x, y) || fcmp (y, x) -> fcmp(x, y)
310+
name: test_fcmp_or_fcmp_with_anti
311+
body: |
312+
bb.1:
313+
liveins: $x0, $x1
314+
; CHECK-LABEL: name: test_fcmp_or_fcmp_with_anti
315+
; CHECK: liveins: $x0, $x1
316+
; CHECK-NEXT: {{ $}}
317+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
318+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
319+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY1]](s64), [[COPY]]
320+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
321+
; CHECK-NEXT: $x0 = COPY %zext(s64)
322+
%0:_(s64) = COPY $x0
323+
%1:_(s64) = COPY $x1
324+
%cmp1:_(s1) = G_FCMP floatpred(one), %1(s64), %0
325+
%cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
326+
%or:_(s1) = G_OR %cmp1, %cmp2
327+
%zext:_(s64) = G_ZEXT %or(s1)
328+
$x0 = COPY %zext
329+
...
330+
---
331+
# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
332+
name: test_fcmp_and_fcmp_with_x_y
333+
body: |
334+
bb.1:
335+
liveins: $x0, $x1
336+
; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y
337+
; CHECK: liveins: $x0, $x1
338+
; CHECK-NEXT: {{ $}}
339+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
340+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
341+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(uno), [[COPY1]](s64), [[COPY]]
342+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
343+
; CHECK-NEXT: $x0 = COPY %zext(s64)
344+
%0:_(s64) = COPY $x0
345+
%1:_(s64) = COPY $x1
346+
%cmp1:_(s1) = G_FCMP floatpred(une), %1(s64), %0
347+
%cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
348+
%and:_(s1) = G_AND %cmp1, %cmp2
349+
%zext:_(s64) = G_ZEXT %and(s1)
350+
$x0 = COPY %zext
351+
...
352+
---
353+
# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
354+
name: test_fcmp_and_fcmp_with_x_y_multi_use
355+
body: |
356+
bb.1:
357+
liveins: $x0, $x1
358+
; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y_multi_use
359+
; CHECK: liveins: $x0, $x1
360+
; CHECK-NEXT: {{ $}}
361+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
362+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
363+
; CHECK-NEXT: %cmp1:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s64), [[COPY]]
364+
; CHECK-NEXT: %cmp2:_(s1) = G_FCMP floatpred(ugt), [[COPY]](s64), [[COPY1]]
365+
; CHECK-NEXT: %and:_(s1) = G_AND %cmp1, %cmp2
366+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT %and(s1)
367+
; CHECK-NEXT: %zext2:_(s64) = G_ZEXT %and(s1)
368+
; CHECK-NEXT: $x0 = COPY %zext(s64)
369+
; CHECK-NEXT: $x2 = COPY %zext2(s64)
370+
%0:_(s64) = COPY $x0
371+
%1:_(s64) = COPY $x1
372+
%cmp1:_(s1) = G_FCMP floatpred(ogt), %1(s64), %0
373+
%cmp2:_(s1) = G_FCMP floatpred(ugt), %0(s64), %1
374+
%and:_(s1) = G_AND %cmp1, %cmp2
375+
%zext:_(s64) = G_ZEXT %and(s1)
376+
%zext2:_(s64) = G_ZEXT %and(s1)
377+
$x0 = COPY %zext
378+
$x2 = COPY %zext2
379+
...
380+
---
381+
# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
382+
name: test_fcmp_and_fcmp_with_vectors
383+
body: |
384+
bb.1:
385+
liveins: $x0, $x1
386+
; CHECK-LABEL: name: test_fcmp_and_fcmp_with_vectors
387+
; CHECK: liveins: $x0, $x1
388+
; CHECK-NEXT: {{ $}}
389+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
390+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1)
391+
; CHECK-NEXT: %zext:_(<2 x s64>) = G_ZEXT [[BUILD_VECTOR]](<2 x s1>)
392+
; CHECK-NEXT: $q0 = COPY %zext(<2 x s64>)
393+
%0:_(s64) = COPY $x0
394+
%1:_(s64) = COPY $x1
395+
%2:_(s64) = COPY $x2
396+
%3:_(s64) = COPY $x3
397+
%4:_(s64) = COPY $x4
398+
%5:_(s64) = COPY $x5
399+
%6:_(s64) = COPY $x6
400+
%7:_(s64) = COPY $x7
401+
%v8:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64)
402+
%v9:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64)
403+
%cmp1:_(<2 x s1>) = G_FCMP floatpred(oeq), %v8(<2 x s64>), %v9
404+
%cmp2:_(<2 x s1>) = G_FCMP floatpred(olt), %v8(<2 x s64>), %v9
405+
%and:_(<2 x s1>) = G_AND %cmp1, %cmp2
406+
%zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
407+
$q0 = COPY %zext
408+
...

0 commit comments

Comments
 (0)