Skip to content

[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. #135817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
const MachineRegisterInfo &MRI,
int64_t SplatValue, bool AllowUndef) {
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
return SplatValAndReg->Value.getSExtValue() == SplatValue;

return false;
}

Expand Down
37 changes: 1 addition & 36 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2592,11 +2592,6 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
case AArch64ISD::FCMEQ:
case AArch64ISD::FCMGE:
case AArch64ISD::FCMGT:
case AArch64ISD::FCMEQz:
case AArch64ISD::FCMGEz:
case AArch64ISD::FCMGTz:
case AArch64ISD::FCMLEz:
case AArch64ISD::FCMLTz:
// Compares return either 0 or all-ones
return VTBits;
case AArch64ISD::VASHR: {
Expand Down Expand Up @@ -2813,11 +2808,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
MAKE_CASE(AArch64ISD::FCMEQz)
MAKE_CASE(AArch64ISD::FCMGEz)
MAKE_CASE(AArch64ISD::FCMGTz)
MAKE_CASE(AArch64ISD::FCMLEz)
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::UADDLV)
Expand Down Expand Up @@ -15821,58 +15811,33 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");

APInt SplatValue;
APInt SplatUndef;
unsigned SplatBitSize = 0;
bool HasAnyUndefs;

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
SplatBitSize, HasAnyUndefs);

bool IsZero = IsCnst && SplatValue == 0;

if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Fcmeq;
if (IsZero)
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the LS implementation.
[[fallthrough]];
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the MI implementation.
[[fallthrough]];
case AArch64CC::MI:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,6 @@ enum NodeType : unsigned {
FCMGE,
FCMGT,

// Vector zero comparisons
FCMEQz,
FCMGEz,
FCMGTz,
FCMLEz,
FCMLTz,

// Round wide FP to narrow FP with inexact results to odd.
FCVTXN,

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,

// FP Comparisons support only S and D element sizes (and H for v8.2a).
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
string asm, SDNode OpNode> {
string asm, SDPatternOperator OpNode> {

let mayRaiseFPException = 1, Uses = [FPCR] in {
let Predicates = [HasNEON, HasFullFP16] in {
Expand Down
36 changes: 0 additions & 36 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -179,36 +179,6 @@ def G_FCMGT : AArch64GenericInstruction {
let hasSideEffects = 0;
}

def G_FCMEQZ : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
let hasSideEffects = 0;
}

def G_FCMGEZ : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
let hasSideEffects = 0;
}

def G_FCMGTZ : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
let hasSideEffects = 0;
}

def G_FCMLEZ : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
let hasSideEffects = 0;
}

def G_FCMLTZ : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
let hasSideEffects = 0;
}

def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$imm, ptype0:$src1);
Expand Down Expand Up @@ -295,12 +265,6 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;

def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;

def : GINodeEquiv<G_BSP, AArch64bsp>;

def : GINodeEquiv<G_UMULL, AArch64umull>;
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -876,11 +876,20 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;

def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
def AArch64fcmeqz : PatFrag<(ops node:$lhs),
(AArch64fcmeq node:$lhs, immAllZerosV)>;

def AArch64fcmgez : PatFrag<(ops node:$lhs),
(AArch64fcmge node:$lhs, immAllZerosV)>;

def AArch64fcmgtz : PatFrag<(ops node:$lhs),
(AArch64fcmgt node:$lhs, immAllZerosV)>;

def AArch64fcmlez : PatFrag<(ops node:$lhs),
(AArch64fcmge immAllZerosV, node:$lhs)>;

def AArch64fcmltz : PatFrag<(ops node:$lhs),
(AArch64fcmgt immAllZerosV, node:$lhs)>;

def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
Expand Down
70 changes: 22 additions & 48 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,16 +808,14 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,

bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
if (!Splat)
return false;
if (Splat->isReg())
return true;

// Later, during selection, we'll try to match imported patterns using
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
// G_BUILD_VECTORs which could match those patterns.
int64_t Cst = Splat->getCst();
return (Cst != 0 && Cst != -1);
if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
return false;

return getAArch64VectorSplat(MI, MRI).has_value();
}

void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down Expand Up @@ -933,58 +931,40 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {

/// \returns a function which builds a vector floating point compare instruction
/// for a condition code \p CC.
/// \param [in] IsZero - True if the comparison is against 0.
/// \param [in] NoNans - True if the target has NoNansFPMath.
std::function<Register(MachineIRBuilder &)>
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
bool NoNans, MachineRegisterInfo &MRI) {
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(LHS);
assert(DstTy.isVector() && "Expected vector types only?");
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
switch (CC) {
default:
llvm_unreachable("Unexpected condition code!");
case AArch64CC::NE:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
auto FCmp = IsZero
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
return MIB.buildNot(DstTy, FCmp).getReg(0);
};
case AArch64CC::EQ:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
return IsZero
? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
: MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
.getReg(0);
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::GE:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
return IsZero
? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
.getReg(0);
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::GT:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
return IsZero
? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
.getReg(0);
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::LS:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
return IsZero
? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
: MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
.getReg(0);
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
};
case AArch64CC::MI:
return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
return IsZero
? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
: MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
.getReg(0);
return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
};
}
}
Expand Down Expand Up @@ -1024,23 +1004,17 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,

LLT DstTy = MRI.getType(Dst);

auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);

// Compares against 0 have special target-specific pseudos.
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;

bool Invert = false;
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
Pred == CmpInst::Predicate::FCMP_UNO) &&
IsZero) {
isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
// NaN, so equivalent to a == a and doesn't need the two comparisons an
// "ord" normally would.
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
// thus equivalent to a != a.
RHS = LHS;
IsZero = false;
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
} else
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
Expand All @@ -1051,12 +1025,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
const bool NoNans =
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;

auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
Register CmpRes;
if (CC2 == AArch64CC::AL)
CmpRes = Cmp(MIB);
else {
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
auto Cmp2Dst = Cmp2(MIB);
auto Cmp1Dst = Cmp(MIB);
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
Expand Down
Loading
Loading