Skip to content

Commit b296e09

Browse files
[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes.
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity. The global-isel changes are due to isBuildVectorAllZeros not identifying all zero floating point vectors. Despite the use of getAnyConstantSplat, which does work, its result is wrapped inside m_SpecificICst that pushes us down an integer only path. I am new to global-isel so is it safe to assume the result of getAnyConstantSplat can be tested directly?
1 parent 637f352 commit b296e09

File tree

11 files changed

+93
-183
lines changed

11 files changed

+93
-183
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
13851385
const MachineRegisterInfo &MRI,
13861386
int64_t SplatValue, bool AllowUndef) {
13871387
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
1388-
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
1388+
return SplatValAndReg->Value.getSExtValue() == SplatValue;
1389+
13891390
return false;
13901391
}
13911392

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2592,11 +2592,6 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
25922592
case AArch64ISD::FCMEQ:
25932593
case AArch64ISD::FCMGE:
25942594
case AArch64ISD::FCMGT:
2595-
case AArch64ISD::FCMEQz:
2596-
case AArch64ISD::FCMGEz:
2597-
case AArch64ISD::FCMGTz:
2598-
case AArch64ISD::FCMLEz:
2599-
case AArch64ISD::FCMLTz:
26002595
// Compares return either 0 or all-ones
26012596
return VTBits;
26022597
case AArch64ISD::VASHR: {
@@ -2813,11 +2808,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
28132808
MAKE_CASE(AArch64ISD::FCMEQ)
28142809
MAKE_CASE(AArch64ISD::FCMGE)
28152810
MAKE_CASE(AArch64ISD::FCMGT)
2816-
MAKE_CASE(AArch64ISD::FCMEQz)
2817-
MAKE_CASE(AArch64ISD::FCMGEz)
2818-
MAKE_CASE(AArch64ISD::FCMGTz)
2819-
MAKE_CASE(AArch64ISD::FCMLEz)
2820-
MAKE_CASE(AArch64ISD::FCMLTz)
28212811
MAKE_CASE(AArch64ISD::SADDV)
28222812
MAKE_CASE(AArch64ISD::UADDV)
28232813
MAKE_CASE(AArch64ISD::UADDLV)
@@ -15821,58 +15811,33 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1582115811
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1582215812
"function only supposed to emit natural comparisons");
1582315813

15824-
APInt SplatValue;
15825-
APInt SplatUndef;
15826-
unsigned SplatBitSize = 0;
15827-
bool HasAnyUndefs;
15828-
15829-
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
15830-
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15831-
SplatBitSize, HasAnyUndefs);
15832-
15833-
bool IsZero = IsCnst && SplatValue == 0;
15834-
1583515814
if (SrcVT.getVectorElementType().isFloatingPoint()) {
1583615815
switch (CC) {
1583715816
default:
1583815817
return SDValue();
1583915818
case AArch64CC::NE: {
15840-
SDValue Fcmeq;
15841-
if (IsZero)
15842-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
15843-
else
15844-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
15819+
SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1584515820
return DAG.getNOT(dl, Fcmeq, VT);
1584615821
}
1584715822
case AArch64CC::EQ:
15848-
if (IsZero)
15849-
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
1585015823
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1585115824
case AArch64CC::GE:
15852-
if (IsZero)
15853-
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
1585415825
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
1585515826
case AArch64CC::GT:
15856-
if (IsZero)
15857-
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
1585815827
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
1585915828
case AArch64CC::LE:
1586015829
if (!NoNans)
1586115830
return SDValue();
1586215831
// If we ignore NaNs then we can use to the LS implementation.
1586315832
[[fallthrough]];
1586415833
case AArch64CC::LS:
15865-
if (IsZero)
15866-
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
1586715834
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
1586815835
case AArch64CC::LT:
1586915836
if (!NoNans)
1587015837
return SDValue();
1587115838
// If we ignore NaNs then we can use to the MI implementation.
1587215839
[[fallthrough]];
1587315840
case AArch64CC::MI:
15874-
if (IsZero)
15875-
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
1587615841
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
1587715842
}
1587815843
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,6 @@ enum NodeType : unsigned {
245245
FCMGE,
246246
FCMGT,
247247

248-
// Vector zero comparisons
249-
FCMEQz,
250-
FCMGEz,
251-
FCMGTz,
252-
FCMLEz,
253-
FCMLTz,
254-
255248
// Round wide FP to narrow FP with inexact results to odd.
256249
FCVTXN,
257250

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
71367136

71377137
// FP Comparisons support only S and D element sizes (and H for v8.2a).
71387138
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
7139-
string asm, SDNode OpNode> {
7139+
string asm, SDPatternOperator OpNode> {
71407140

71417141
let mayRaiseFPException = 1, Uses = [FPCR] in {
71427142
let Predicates = [HasNEON, HasFullFP16] in {

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -179,36 +179,6 @@ def G_FCMGT : AArch64GenericInstruction {
179179
let hasSideEffects = 0;
180180
}
181181

182-
def G_FCMEQZ : AArch64GenericInstruction {
183-
let OutOperandList = (outs type0:$dst);
184-
let InOperandList = (ins type0:$src);
185-
let hasSideEffects = 0;
186-
}
187-
188-
def G_FCMGEZ : AArch64GenericInstruction {
189-
let OutOperandList = (outs type0:$dst);
190-
let InOperandList = (ins type0:$src);
191-
let hasSideEffects = 0;
192-
}
193-
194-
def G_FCMGTZ : AArch64GenericInstruction {
195-
let OutOperandList = (outs type0:$dst);
196-
let InOperandList = (ins type0:$src);
197-
let hasSideEffects = 0;
198-
}
199-
200-
def G_FCMLEZ : AArch64GenericInstruction {
201-
let OutOperandList = (outs type0:$dst);
202-
let InOperandList = (ins type0:$src);
203-
let hasSideEffects = 0;
204-
}
205-
206-
def G_FCMLTZ : AArch64GenericInstruction {
207-
let OutOperandList = (outs type0:$dst);
208-
let InOperandList = (ins type0:$src);
209-
let hasSideEffects = 0;
210-
}
211-
212182
def G_AARCH64_PREFETCH : AArch64GenericInstruction {
213183
let OutOperandList = (outs);
214184
let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -295,12 +265,6 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
295265
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
296266
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
297267

298-
def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
299-
def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
300-
def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
301-
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
302-
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
303-
304268
def : GINodeEquiv<G_BSP, AArch64bsp>;
305269

306270
def : GINodeEquiv<G_UMULL, AArch64umull>;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -876,11 +876,20 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
876876
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
877877
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
878878

879-
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
880-
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
881-
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
882-
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
883-
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
879+
def AArch64fcmeqz : PatFrag<(ops node:$lhs),
880+
(AArch64fcmeq node:$lhs, immAllZerosV)>;
881+
882+
def AArch64fcmgez : PatFrag<(ops node:$lhs),
883+
(AArch64fcmge node:$lhs, immAllZerosV)>;
884+
885+
def AArch64fcmgtz : PatFrag<(ops node:$lhs),
886+
(AArch64fcmgt node:$lhs, immAllZerosV)>;
887+
888+
def AArch64fcmlez : PatFrag<(ops node:$lhs),
889+
(AArch64fcmge immAllZerosV, node:$lhs)>;
890+
891+
def AArch64fcmltz : PatFrag<(ops node:$lhs),
892+
(AArch64fcmgt immAllZerosV, node:$lhs)>;
884893

885894
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
886895
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 22 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -808,16 +808,14 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
808808

809809
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
810810
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
811-
auto Splat = getAArch64VectorSplat(MI, MRI);
812-
if (!Splat)
813-
return false;
814-
if (Splat->isReg())
815-
return true;
811+
816812
// Later, during selection, we'll try to match imported patterns using
817813
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
818814
// G_BUILD_VECTORs which could match those patterns.
819-
int64_t Cst = Splat->getCst();
820-
return (Cst != 0 && Cst != -1);
815+
if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
816+
return false;
817+
818+
return getAArch64VectorSplat(MI, MRI).has_value();
821819
}
822820

823821
void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -936,55 +934,38 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
936934
/// \param [in] IsZero - True if the comparison is against 0.
937935
/// \param [in] NoNans - True if the target has NoNansFPMath.
938936
std::function<Register(MachineIRBuilder &)>
939-
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
940-
bool NoNans, MachineRegisterInfo &MRI) {
937+
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
938+
MachineRegisterInfo &MRI) {
941939
LLT DstTy = MRI.getType(LHS);
942940
assert(DstTy.isVector() && "Expected vector types only?");
943941
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
944942
switch (CC) {
945943
default:
946944
llvm_unreachable("Unexpected condition code!");
947945
case AArch64CC::NE:
948-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949-
auto FCmp = IsZero
950-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
951-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
946+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
947+
auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
952948
return MIB.buildNot(DstTy, FCmp).getReg(0);
953949
};
954950
case AArch64CC::EQ:
955-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956-
return IsZero
957-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
958-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959-
.getReg(0);
951+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
952+
return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
960953
};
961954
case AArch64CC::GE:
962-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963-
return IsZero
964-
? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
965-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966-
.getReg(0);
955+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
956+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
967957
};
968958
case AArch64CC::GT:
969-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970-
return IsZero
971-
? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
972-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973-
.getReg(0);
959+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
960+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
974961
};
975962
case AArch64CC::LS:
976-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977-
return IsZero
978-
? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
979-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980-
.getReg(0);
963+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
964+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
981965
};
982966
case AArch64CC::MI:
983-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984-
return IsZero
985-
? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
986-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987-
.getReg(0);
967+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
968+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
988969
};
989970
}
990971
}
@@ -1024,23 +1005,17 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10241005

10251006
LLT DstTy = MRI.getType(Dst);
10261007

1027-
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1028-
1029-
// Compares against 0 have special target-specific pseudos.
1030-
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1031-
10321008
bool Invert = false;
10331009
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
10341010
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
10351011
Pred == CmpInst::Predicate::FCMP_UNO) &&
1036-
IsZero) {
1012+
isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
10371013
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
10381014
// NaN, so equivalent to a == a and doesn't need the two comparisons an
10391015
// "ord" normally would.
10401016
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
10411017
// thus equivalent to a != a.
10421018
RHS = LHS;
1043-
IsZero = false;
10441019
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
10451020
} else
10461021
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1051,12 +1026,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10511026
const bool NoNans =
10521027
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
10531028

1054-
auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1029+
auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
10551030
Register CmpRes;
10561031
if (CC2 == AArch64CC::AL)
10571032
CmpRes = Cmp(MIB);
10581033
else {
1059-
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1034+
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
10601035
auto Cmp2Dst = Cmp2(MIB);
10611036
auto Cmp1Dst = Cmp(MIB);
10621037
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

0 commit comments

Comments
 (0)