Skip to content

Commit d7f3c31

Browse files
Reapply "[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. (llvm#135817)"
This reverts commit 427b644. Original patch has been updated to include a fix to esnure AArch64InstructionSelector::emitConstantVector supports all the cases where isBuildVectorAllOnes returns true.
1 parent 8b2d269 commit d7f3c31

13 files changed

+185
-194
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
13851385
const MachineRegisterInfo &MRI,
13861386
int64_t SplatValue, bool AllowUndef) {
13871387
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
1388-
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
1388+
return SplatValAndReg->Value.getSExtValue() == SplatValue;
1389+
13891390
return false;
13901391
}
13911392

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2602,11 +2602,6 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
26022602
case AArch64ISD::FCMEQ:
26032603
case AArch64ISD::FCMGE:
26042604
case AArch64ISD::FCMGT:
2605-
case AArch64ISD::FCMEQz:
2606-
case AArch64ISD::FCMGEz:
2607-
case AArch64ISD::FCMGTz:
2608-
case AArch64ISD::FCMLEz:
2609-
case AArch64ISD::FCMLTz:
26102605
// Compares return either 0 or all-ones
26112606
return VTBits;
26122607
case AArch64ISD::VASHR: {
@@ -2823,11 +2818,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
28232818
MAKE_CASE(AArch64ISD::FCMEQ)
28242819
MAKE_CASE(AArch64ISD::FCMGE)
28252820
MAKE_CASE(AArch64ISD::FCMGT)
2826-
MAKE_CASE(AArch64ISD::FCMEQz)
2827-
MAKE_CASE(AArch64ISD::FCMGEz)
2828-
MAKE_CASE(AArch64ISD::FCMGTz)
2829-
MAKE_CASE(AArch64ISD::FCMLEz)
2830-
MAKE_CASE(AArch64ISD::FCMLTz)
28312821
MAKE_CASE(AArch64ISD::SADDV)
28322822
MAKE_CASE(AArch64ISD::UADDV)
28332823
MAKE_CASE(AArch64ISD::UADDLV)
@@ -15840,58 +15830,33 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1584015830
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1584115831
"function only supposed to emit natural comparisons");
1584215832

15843-
APInt SplatValue;
15844-
APInt SplatUndef;
15845-
unsigned SplatBitSize = 0;
15846-
bool HasAnyUndefs;
15847-
15848-
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
15849-
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
15850-
SplatBitSize, HasAnyUndefs);
15851-
15852-
bool IsZero = IsCnst && SplatValue == 0;
15853-
1585415833
if (SrcVT.getVectorElementType().isFloatingPoint()) {
1585515834
switch (CC) {
1585615835
default:
1585715836
return SDValue();
1585815837
case AArch64CC::NE: {
15859-
SDValue Fcmeq;
15860-
if (IsZero)
15861-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
15862-
else
15863-
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
15838+
SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1586415839
return DAG.getNOT(dl, Fcmeq, VT);
1586515840
}
1586615841
case AArch64CC::EQ:
15867-
if (IsZero)
15868-
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
1586915842
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
1587015843
case AArch64CC::GE:
15871-
if (IsZero)
15872-
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
1587315844
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
1587415845
case AArch64CC::GT:
15875-
if (IsZero)
15876-
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
1587715846
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
1587815847
case AArch64CC::LE:
1587915848
if (!NoNans)
1588015849
return SDValue();
1588115850
// If we ignore NaNs then we can use to the LS implementation.
1588215851
[[fallthrough]];
1588315852
case AArch64CC::LS:
15884-
if (IsZero)
15885-
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
1588615853
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
1588715854
case AArch64CC::LT:
1588815855
if (!NoNans)
1588915856
return SDValue();
1589015857
// If we ignore NaNs then we can use to the MI implementation.
1589115858
[[fallthrough]];
1589215859
case AArch64CC::MI:
15893-
if (IsZero)
15894-
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
1589515860
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
1589615861
}
1589715862
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,6 @@ enum NodeType : unsigned {
245245
FCMGE,
246246
FCMGT,
247247

248-
// Vector zero comparisons
249-
FCMEQz,
250-
FCMGEz,
251-
FCMGTz,
252-
FCMLEz,
253-
FCMLTz,
254-
255248
// Round wide FP to narrow FP with inexact results to odd.
256249
FCVTXN,
257250

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
71367136

71377137
// FP Comparisons support only S and D element sizes (and H for v8.2a).
71387138
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
7139-
string asm, SDNode OpNode> {
7139+
string asm, SDPatternOperator OpNode> {
71407140

71417141
let mayRaiseFPException = 1, Uses = [FPCR] in {
71427142
let Predicates = [HasNEON, HasFullFP16] in {

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -179,36 +179,6 @@ def G_FCMGT : AArch64GenericInstruction {
179179
let hasSideEffects = 0;
180180
}
181181

182-
def G_FCMEQZ : AArch64GenericInstruction {
183-
let OutOperandList = (outs type0:$dst);
184-
let InOperandList = (ins type0:$src);
185-
let hasSideEffects = 0;
186-
}
187-
188-
def G_FCMGEZ : AArch64GenericInstruction {
189-
let OutOperandList = (outs type0:$dst);
190-
let InOperandList = (ins type0:$src);
191-
let hasSideEffects = 0;
192-
}
193-
194-
def G_FCMGTZ : AArch64GenericInstruction {
195-
let OutOperandList = (outs type0:$dst);
196-
let InOperandList = (ins type0:$src);
197-
let hasSideEffects = 0;
198-
}
199-
200-
def G_FCMLEZ : AArch64GenericInstruction {
201-
let OutOperandList = (outs type0:$dst);
202-
let InOperandList = (ins type0:$src);
203-
let hasSideEffects = 0;
204-
}
205-
206-
def G_FCMLTZ : AArch64GenericInstruction {
207-
let OutOperandList = (outs type0:$dst);
208-
let InOperandList = (ins type0:$src);
209-
let hasSideEffects = 0;
210-
}
211-
212182
def G_AARCH64_PREFETCH : AArch64GenericInstruction {
213183
let OutOperandList = (outs);
214184
let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -295,12 +265,6 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
295265
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
296266
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
297267

298-
def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
299-
def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
300-
def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
301-
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
302-
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
303-
304268
def : GINodeEquiv<G_BSP, AArch64bsp>;
305269

306270
def : GINodeEquiv<G_UMULL, AArch64umull>;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -882,11 +882,20 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
882882
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
883883
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
884884

885-
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
886-
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
887-
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
888-
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
889-
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
885+
def AArch64fcmeqz : PatFrag<(ops node:$lhs),
886+
(AArch64fcmeq node:$lhs, immAllZerosV)>;
887+
888+
def AArch64fcmgez : PatFrag<(ops node:$lhs),
889+
(AArch64fcmge node:$lhs, immAllZerosV)>;
890+
891+
def AArch64fcmgtz : PatFrag<(ops node:$lhs),
892+
(AArch64fcmgt node:$lhs, immAllZerosV)>;
893+
894+
def AArch64fcmlez : PatFrag<(ops node:$lhs),
895+
(AArch64fcmge immAllZerosV, node:$lhs)>;
896+
897+
def AArch64fcmltz : PatFrag<(ops node:$lhs),
898+
(AArch64fcmgt immAllZerosV, node:$lhs)>;
890899

891900
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
892901
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5743,9 +5743,13 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
57435743
}
57445744
}
57455745

5746-
if (CV->getSplatValue()) {
5746+
if (Constant *SplatValue = CV->getSplatValue()) {
5747+
APInt SplatValueAsInt =
5748+
isa<ConstantFP>(SplatValue)
5749+
? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5750+
: SplatValue->getUniqueInteger();
57475751
APInt DefBits = APInt::getSplat(
5748-
DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
5752+
DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
57495753
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
57505754
MachineInstr *NewOp;
57515755
bool Inv = false;

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -808,16 +808,14 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
808808

809809
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
810810
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
811-
auto Splat = getAArch64VectorSplat(MI, MRI);
812-
if (!Splat)
813-
return false;
814-
if (Splat->isReg())
815-
return true;
811+
816812
// Later, during selection, we'll try to match imported patterns using
817813
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
818814
// G_BUILD_VECTORs which could match those patterns.
819-
int64_t Cst = Splat->getCst();
820-
return (Cst != 0 && Cst != -1);
815+
if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
816+
return false;
817+
818+
return getAArch64VectorSplat(MI, MRI).has_value();
821819
}
822820

823821
void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -933,58 +931,40 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
933931

934932
/// \returns a function which builds a vector floating point compare instruction
935933
/// for a condition code \p CC.
936-
/// \param [in] IsZero - True if the comparison is against 0.
937934
/// \param [in] NoNans - True if the target has NoNansFPMath.
938935
std::function<Register(MachineIRBuilder &)>
939-
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
940-
bool NoNans, MachineRegisterInfo &MRI) {
936+
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
937+
MachineRegisterInfo &MRI) {
941938
LLT DstTy = MRI.getType(LHS);
942939
assert(DstTy.isVector() && "Expected vector types only?");
943940
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
944941
switch (CC) {
945942
default:
946943
llvm_unreachable("Unexpected condition code!");
947944
case AArch64CC::NE:
948-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949-
auto FCmp = IsZero
950-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
951-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
945+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
946+
auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
952947
return MIB.buildNot(DstTy, FCmp).getReg(0);
953948
};
954949
case AArch64CC::EQ:
955-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956-
return IsZero
957-
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
958-
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959-
.getReg(0);
950+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
951+
return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
960952
};
961953
case AArch64CC::GE:
962-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963-
return IsZero
964-
? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
965-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966-
.getReg(0);
954+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
955+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
967956
};
968957
case AArch64CC::GT:
969-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970-
return IsZero
971-
? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
972-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973-
.getReg(0);
958+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
959+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
974960
};
975961
case AArch64CC::LS:
976-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977-
return IsZero
978-
? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
979-
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980-
.getReg(0);
962+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
963+
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
981964
};
982965
case AArch64CC::MI:
983-
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984-
return IsZero
985-
? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
986-
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987-
.getReg(0);
966+
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
967+
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
988968
};
989969
}
990970
}
@@ -1024,23 +1004,17 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10241004

10251005
LLT DstTy = MRI.getType(Dst);
10261006

1027-
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1028-
1029-
// Compares against 0 have special target-specific pseudos.
1030-
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1031-
10321007
bool Invert = false;
10331008
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
10341009
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
10351010
Pred == CmpInst::Predicate::FCMP_UNO) &&
1036-
IsZero) {
1011+
isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
10371012
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
10381013
// NaN, so equivalent to a == a and doesn't need the two comparisons an
10391014
// "ord" normally would.
10401015
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
10411016
// thus equivalent to a != a.
10421017
RHS = LHS;
1043-
IsZero = false;
10441018
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
10451019
} else
10461020
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1051,12 +1025,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10511025
const bool NoNans =
10521026
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
10531027

1054-
auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1028+
auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
10551029
Register CmpRes;
10561030
if (CC2 == AArch64CC::AL)
10571031
CmpRes = Cmp(MIB);
10581032
else {
1059-
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1033+
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
10601034
auto Cmp2Dst = Cmp2(MIB);
10611035
auto Cmp1Dst = Cmp(MIB);
10621036
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

0 commit comments

Comments
 (0)