Skip to content

Commit bf268a0

Browse files
committed
[AArch64] Emit vector FP cmp when LE is used with fast-math
Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D130093
1 parent a8de8ca commit bf268a0

File tree

3 files changed

+43
-319
lines changed

3 files changed

+43
-319
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11843,6 +11843,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
1184311843
EVT N00VT = N00.getValueType();
1184411844
SDLoc DL(N);
1184511845

11846+
// Propagate fast-math-flags.
11847+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
11848+
1184611849
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
1184711850
// the same size as the compared operands. Try to optimize sext(setcc())
1184811851
// if this is the case.
@@ -12384,6 +12387,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1238412387
return V;
1238512388

1238612389
if (N0.getOpcode() == ISD::SETCC) {
12390+
// Propagate fast-math-flags.
12391+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12392+
1238712393
// Only do this before legalize for now.
1238812394
if (!LegalOperations && VT.isVector() &&
1238912395
N0.getValueType().getVectorElementType() == MVT::i1) {
@@ -12575,6 +12581,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
1257512581
}
1257612582

1257712583
if (N0.getOpcode() == ISD::SETCC) {
12584+
// Propagate fast-math-flags.
12585+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12586+
1257812587
// For vectors:
1257912588
// aext(setcc) -> vsetcc
1258012589
// aext(setcc) -> truncate(vsetcc)

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11975,6 +11975,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1197511975
if (IsZero)
1197611976
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
1197711977
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
11978+
case AArch64CC::LE:
11979+
if (!NoNans)
11980+
return SDValue();
11981+
// If we ignore NaNs then we can use to the LS implementation.
11982+
LLVM_FALLTHROUGH;
1197811983
case AArch64CC::LS:
1197911984
if (IsZero)
1198011985
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
@@ -12079,7 +12084,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1207912084
bool ShouldInvert;
1208012085
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
1208112086

12082-
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
12087+
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
1208312088
SDValue Cmp =
1208412089
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
1208512090
if (!Cmp.getNode())

0 commit comments

Comments
 (0)