Skip to content

Commit 3abffed

Browse files
paulwalker-armIanWood1
authored andcommitted
[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. (llvm#135817)
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity.
1 parent cf7b439 commit 3abffed

File tree

11 files changed

+93
-167
lines changed

11 files changed

+93
-167
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
13851385
const MachineRegisterInfo &MRI,
13861386
int64_t SplatValue, bool AllowUndef) {
13871387
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
1388-
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
1388+
return SplatValAndReg->Value.getSExtValue() == SplatValue;
1389+
13891390
return false;
13901391
}
13911392

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2595,11 +2595,6 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
25952595
case AArch64ISD::FCMEQ:
25962596
case AArch64ISD::FCMGE:
25972597
case AArch64ISD::FCMGT:
2598-
case AArch64ISD::FCMEQz:
2599-
case AArch64ISD::FCMGEz:
2600-
case AArch64ISD::FCMGTz:
2601-
case AArch64ISD::FCMLEz:
2602-
case AArch64ISD::FCMLTz:
26032598
// Compares return either 0 or all-ones
26042599
return VTBits;
26052600
case AArch64ISD::VASHR: {
@@ -2816,11 +2811,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
28162811
MAKE_CASE(AArch64ISD::FCMEQ)
28172812
MAKE_CASE(AArch64ISD::FCMGE)
28182813
MAKE_CASE(AArch64ISD::FCMGT)
2819-
MAKE_CASE(AArch64ISD::FCMEQz)
2820-
MAKE_CASE(AArch64ISD::FCMGEz)
2821-
MAKE_CASE(AArch64ISD::FCMGTz)
2822-
MAKE_CASE(AArch64ISD::FCMLEz)
2823-
MAKE_CASE(AArch64ISD::FCMLTz)
28242814
MAKE_CASE(AArch64ISD::SADDV)
28252815
MAKE_CASE(AArch64ISD::UADDV)
28262816
MAKE_CASE(AArch64ISD::UADDLV)
@@ -15829,58 +15819,33 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1582915819
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1583015820
"function only supposed to emit natural comparisons");
1583115821

15832-
APInt SplatValue;
15833-
APInt SplatUndef;
15834-
unsigned SplatBitSize = 0;
15835-
bool HasAnyUndefs;
15836-
15837-
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
15838-
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15839-
SplatBitSize, HasAnyUndefs);
15840-
15841-
bool IsZero = IsCnst && SplatValue == 0;
15842-
1584315822
if (SrcVT.getVectorElementType().isFloatingPoint()) {
1584415823
switch (CC) {
1584515824
default:
1584615825
return SDValue();
1584715826
case AArch64CC::NE: {
15848-
SDValue Fcmeq;
15849-
if (IsZero)
15850-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
15851-
else
15852-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
15827+
SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1585315828
return DAG.getNOT(dl, Fcmeq, VT);
1585415829
}
1585515830
case AArch64CC::EQ:
15856-
if (IsZero)
15857-
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
1585815831
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1585915832
case AArch64CC::GE:
15860-
if (IsZero)
15861-
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
1586215833
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
1586315834
case AArch64CC::GT:
15864-
if (IsZero)
15865-
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
1586615835
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
1586715836
case AArch64CC::LE:
1586815837
if (!NoNans)
1586915838
return SDValue();
1587015839
// If we ignore NaNs then we can use to the LS implementation.
1587115840
[[fallthrough]];
1587215841
case AArch64CC::LS:
15873-
if (IsZero)
15874-
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
1587515842
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
1587615843
case AArch64CC::LT:
1587715844
if (!NoNans)
1587815845
return SDValue();
1587915846
// If we ignore NaNs then we can use to the MI implementation.
1588015847
[[fallthrough]];
1588115848
case AArch64CC::MI:
15882-
if (IsZero)
15883-
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
1588415849
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
1588515850
}
1588615851
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,6 @@ enum NodeType : unsigned {
245245
FCMGE,
246246
FCMGT,
247247

248-
// Vector zero comparisons
249-
FCMEQz,
250-
FCMGEz,
251-
FCMGTz,
252-
FCMLEz,
253-
FCMLTz,
254-
255248
// Round wide FP to narrow FP with inexact results to odd.
256249
FCVTXN,
257250

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
71367136

71377137
// FP Comparisons support only S and D element sizes (and H for v8.2a).
71387138
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
7139-
string asm, SDNode OpNode> {
7139+
string asm, SDPatternOperator OpNode> {
71407140

71417141
let mayRaiseFPException = 1, Uses = [FPCR] in {
71427142
let Predicates = [HasNEON, HasFullFP16] in {

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -179,36 +179,6 @@ def G_FCMGT : AArch64GenericInstruction {
179179
let hasSideEffects = 0;
180180
}
181181

182-
def G_FCMEQZ : AArch64GenericInstruction {
183-
let OutOperandList = (outs type0:$dst);
184-
let InOperandList = (ins type0:$src);
185-
let hasSideEffects = 0;
186-
}
187-
188-
def G_FCMGEZ : AArch64GenericInstruction {
189-
let OutOperandList = (outs type0:$dst);
190-
let InOperandList = (ins type0:$src);
191-
let hasSideEffects = 0;
192-
}
193-
194-
def G_FCMGTZ : AArch64GenericInstruction {
195-
let OutOperandList = (outs type0:$dst);
196-
let InOperandList = (ins type0:$src);
197-
let hasSideEffects = 0;
198-
}
199-
200-
def G_FCMLEZ : AArch64GenericInstruction {
201-
let OutOperandList = (outs type0:$dst);
202-
let InOperandList = (ins type0:$src);
203-
let hasSideEffects = 0;
204-
}
205-
206-
def G_FCMLTZ : AArch64GenericInstruction {
207-
let OutOperandList = (outs type0:$dst);
208-
let InOperandList = (ins type0:$src);
209-
let hasSideEffects = 0;
210-
}
211-
212182
def G_AARCH64_PREFETCH : AArch64GenericInstruction {
213183
let OutOperandList = (outs);
214184
let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -295,12 +265,6 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
295265
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
296266
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
297267

298-
def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
299-
def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
300-
def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
301-
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
302-
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
303-
304268
def : GINodeEquiv<G_BSP, AArch64bsp>;
305269

306270
def : GINodeEquiv<G_UMULL, AArch64umull>;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -882,11 +882,20 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
882882
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
883883
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
884884

885-
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
886-
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
887-
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
888-
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
889-
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
885+
def AArch64fcmeqz : PatFrag<(ops node:$lhs),
886+
(AArch64fcmeq node:$lhs, immAllZerosV)>;
887+
888+
def AArch64fcmgez : PatFrag<(ops node:$lhs),
889+
(AArch64fcmge node:$lhs, immAllZerosV)>;
890+
891+
def AArch64fcmgtz : PatFrag<(ops node:$lhs),
892+
(AArch64fcmgt node:$lhs, immAllZerosV)>;
893+
894+
def AArch64fcmlez : PatFrag<(ops node:$lhs),
895+
(AArch64fcmge immAllZerosV, node:$lhs)>;
896+
897+
def AArch64fcmltz : PatFrag<(ops node:$lhs),
898+
(AArch64fcmgt immAllZerosV, node:$lhs)>;
890899

891900
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
892901
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -808,16 +808,14 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
808808

809809
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
810810
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
811-
auto Splat = getAArch64VectorSplat(MI, MRI);
812-
if (!Splat)
813-
return false;
814-
if (Splat->isReg())
815-
return true;
811+
816812
// Later, during selection, we'll try to match imported patterns using
817813
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
818814
// G_BUILD_VECTORs which could match those patterns.
819-
int64_t Cst = Splat->getCst();
820-
return (Cst != 0 && Cst != -1);
815+
if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
816+
return false;
817+
818+
return getAArch64VectorSplat(MI, MRI).has_value();
821819
}
822820

823821
void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -933,58 +931,40 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
933931

934932
/// \returns a function which builds a vector floating point compare instruction
935933
/// for a condition code \p CC.
936-
/// \param [in] IsZero - True if the comparison is against 0.
937934
/// \param [in] NoNans - True if the target has NoNansFPMath.
938935
std::function<Register(MachineIRBuilder &)>
939-
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
940-
bool NoNans, MachineRegisterInfo &MRI) {
936+
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
937+
MachineRegisterInfo &MRI) {
941938
LLT DstTy = MRI.getType(LHS);
942939
assert(DstTy.isVector() && "Expected vector types only?");
943940
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
944941
switch (CC) {
945942
default:
946943
llvm_unreachable("Unexpected condition code!");
947944
case AArch64CC::NE:
948-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949-
auto FCmp = IsZero
950-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
951-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
945+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
946+
auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
952947
return MIB.buildNot(DstTy, FCmp).getReg(0);
953948
};
954949
case AArch64CC::EQ:
955-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956-
return IsZero
957-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
958-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959-
.getReg(0);
950+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
951+
return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
960952
};
961953
case AArch64CC::GE:
962-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963-
return IsZero
964-
? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
965-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966-
.getReg(0);
954+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
955+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
967956
};
968957
case AArch64CC::GT:
969-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970-
return IsZero
971-
? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
972-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973-
.getReg(0);
958+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
959+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
974960
};
975961
case AArch64CC::LS:
976-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977-
return IsZero
978-
? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
979-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980-
.getReg(0);
962+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
963+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
981964
};
982965
case AArch64CC::MI:
983-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984-
return IsZero
985-
? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
986-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987-
.getReg(0);
966+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
967+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
988968
};
989969
}
990970
}
@@ -1024,23 +1004,17 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10241004

10251005
LLT DstTy = MRI.getType(Dst);
10261006

1027-
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1028-
1029-
// Compares against 0 have special target-specific pseudos.
1030-
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1031-
10321007
bool Invert = false;
10331008
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
10341009
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
10351010
Pred == CmpInst::Predicate::FCMP_UNO) &&
1036-
IsZero) {
1011+
isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
10371012
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
10381013
// NaN, so equivalent to a == a and doesn't need the two comparisons an
10391014
// "ord" normally would.
10401015
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
10411016
// thus equivalent to a != a.
10421017
RHS = LHS;
1043-
IsZero = false;
10441018
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
10451019
} else
10461020
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1051,12 +1025,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10511025
const bool NoNans =
10521026
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
10531027

1054-
auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1028+
auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
10551029
Register CmpRes;
10561030
if (CC2 == AArch64CC::AL)
10571031
CmpRes = Cmp(MIB);
10581032
else {
1059-
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1033+
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
10601034
auto Cmp2Dst = Cmp2(MIB);
10611035
auto Cmp1Dst = Cmp(MIB);
10621036
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

0 commit comments

Comments
 (0)