Skip to content

Commit d43270b

Browse files
c-rhodesfhahn
authored andcommitted
[AArch64] Emit vector FP cmp when LE is used with fast-math
Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D130093 (cherry-picked from bf268a0)
1 parent 749f165 commit d43270b

File tree

3 files changed

+43
-319
lines changed

3 files changed

+43
-319
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11697,6 +11697,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
1169711697
EVT N00VT = N00.getValueType();
1169811698
SDLoc DL(N);
1169911699

11700+
// Propagate fast-math-flags.
11701+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
11702+
1170011703
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
1170111704
// the same size as the compared operands. Try to optimize sext(setcc())
1170211705
// if this is the case.
@@ -12230,6 +12233,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1223012233
return V;
1223112234

1223212235
if (N0.getOpcode() == ISD::SETCC) {
12236+
// Propagate fast-math-flags.
12237+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12238+
1223312239
// Only do this before legalize for now.
1223412240
if (!LegalOperations && VT.isVector() &&
1223512241
N0.getValueType().getVectorElementType() == MVT::i1) {
@@ -12417,6 +12423,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
1241712423
}
1241812424

1241912425
if (N0.getOpcode() == ISD::SETCC) {
12426+
// Propagate fast-math-flags.
12427+
SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12428+
1242012429
// For vectors:
1242112430
// aext(setcc) -> vsetcc
1242212431
// aext(setcc) -> truncate(vsetcc)

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12087,6 +12087,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1208712087
if (IsZero)
1208812088
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
1208912089
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
12090+
case AArch64CC::LE:
12091+
if (!NoNans)
12092+
return SDValue();
12093+
// If we ignore NaNs then we can use to the LS implementation.
12094+
LLVM_FALLTHROUGH;
1209012095
case AArch64CC::LS:
1209112096
if (IsZero)
1209212097
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
@@ -12192,7 +12197,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1219212197
bool ShouldInvert;
1219312198
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
1219412199

12195-
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
12200+
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
1219612201
SDValue Cmp =
1219712202
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
1219812203
if (!Cmp.getNode())

0 commit comments

Comments
 (0)