-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. #135817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. #135817
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Paul Walker (paulwalker-arm) ChangesWe can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity. The global-isel changes are due to isBuildVectorAllZeros not identifying all zero floating point vectors. Despite the use of getAnyConstantSplat, which does work, its result is wrapped inside m_SpecificICst that pushes us down an integer only path. I am new to global-isel so is it safe to assume the result of getAnyConstantSplat can be tested directly? Patch is 29.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135817.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 223d69c362185..d8cc86b34a819 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1385,7 +1385,8 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
const MachineRegisterInfo &MRI,
int64_t SplatValue, bool AllowUndef) {
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
- return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
+ return SplatValAndReg->Value.getSExtValue() == SplatValue;
+
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 830ec6886e6bc..eb58dd2b5b1d8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2592,11 +2592,6 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
case AArch64ISD::FCMEQ:
case AArch64ISD::FCMGE:
case AArch64ISD::FCMGT:
- case AArch64ISD::FCMEQz:
- case AArch64ISD::FCMGEz:
- case AArch64ISD::FCMGTz:
- case AArch64ISD::FCMLEz:
- case AArch64ISD::FCMLTz:
// Compares return either 0 or all-ones
return VTBits;
case AArch64ISD::VASHR: {
@@ -2813,11 +2808,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
- MAKE_CASE(AArch64ISD::FCMEQz)
- MAKE_CASE(AArch64ISD::FCMGEz)
- MAKE_CASE(AArch64ISD::FCMGTz)
- MAKE_CASE(AArch64ISD::FCMLEz)
- MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::UADDLV)
@@ -15821,40 +15811,19 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
- APInt SplatValue;
- APInt SplatUndef;
- unsigned SplatBitSize = 0;
- bool HasAnyUndefs;
-
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
- bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
- SplatBitSize, HasAnyUndefs);
-
- bool IsZero = IsCnst && SplatValue == 0;
-
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
- SDValue Fcmeq;
- if (IsZero)
- Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
- else
- Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+ SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
- if (IsZero)
- return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
- if (IsZero)
- return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
- if (IsZero)
- return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (!NoNans)
@@ -15862,8 +15831,6 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
// If we ignore NaNs then we can use to the LS implementation.
[[fallthrough]];
case AArch64CC::LS:
- if (IsZero)
- return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
@@ -15871,8 +15838,6 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
// If we ignore NaNs then we can use to the MI implementation.
[[fallthrough]];
case AArch64CC::MI:
- if (IsZero)
- return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 0d51ef2be8631..adbe7e9d0a0f3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -245,13 +245,6 @@ enum NodeType : unsigned {
FCMGE,
FCMGT,
- // Vector zero comparisons
- FCMEQz,
- FCMGEz,
- FCMGTz,
- FCMLEz,
- FCMLTz,
-
// Round wide FP to narrow FP with inexact results to odd.
FCVTXN,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9bbcb6f3aedf5..2a0da9a1373ee 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
// FP Comparisons support only S and D element sizes (and H for v8.2a).
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
- string asm, SDNode OpNode> {
+ string asm, SDPatternOperator OpNode> {
let mayRaiseFPException = 1, Uses = [FPCR] in {
let Predicates = [HasNEON, HasFullFP16] in {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index a99019d72b4ac..7322212c5bb24 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -179,36 +179,6 @@ def G_FCMGT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_FCMEQZ : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
-def G_FCMGEZ : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
-def G_FCMGTZ : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
-def G_FCMLEZ : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
-def G_FCMLTZ : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -295,12 +265,6 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
-def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
-def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
-def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
-def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
-def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
-
def : GINodeEquiv<G_BSP, AArch64bsp>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b90792d60d102..319d5b0519487 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -876,11 +876,20 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
-def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
-def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
-def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
-def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
-def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
+def AArch64fcmeqz : PatFrag<(ops node:$lhs),
+ (AArch64fcmeq node:$lhs, immAllZerosV)>;
+
+def AArch64fcmgez : PatFrag<(ops node:$lhs),
+ (AArch64fcmge node:$lhs, immAllZerosV)>;
+
+def AArch64fcmgtz : PatFrag<(ops node:$lhs),
+ (AArch64fcmgt node:$lhs, immAllZerosV)>;
+
+def AArch64fcmlez : PatFrag<(ops node:$lhs),
+ (AArch64fcmge immAllZerosV, node:$lhs)>;
+
+def AArch64fcmltz : PatFrag<(ops node:$lhs),
+ (AArch64fcmgt immAllZerosV, node:$lhs)>;
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 4785c7b68d94d..3e242c1b9a114 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -808,16 +808,14 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- auto Splat = getAArch64VectorSplat(MI, MRI);
- if (!Splat)
- return false;
- if (Splat->isReg())
- return true;
+
// Later, during selection, we'll try to match imported patterns using
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
// G_BUILD_VECTORs which could match those patterns.
- int64_t Cst = Splat->getCst();
- return (Cst != 0 && Cst != -1);
+ if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
+ return false;
+
+ return getAArch64VectorSplat(MI, MRI).has_value();
}
void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -936,8 +934,8 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
/// \param [in] IsZero - True if the comparison is against 0.
/// \param [in] NoNans - True if the target has NoNansFPMath.
std::function<Register(MachineIRBuilder &)>
-getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
- bool NoNans, MachineRegisterInfo &MRI) {
+getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
+ MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(LHS);
assert(DstTy.isVector() && "Expected vector types only?");
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
@@ -945,46 +943,29 @@ getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
default:
llvm_unreachable("Unexpected condition code!");
case AArch64CC::NE:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- auto FCmp = IsZero
- ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
- : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
return MIB.buildNot(DstTy, FCmp).getReg(0);
};
case AArch64CC::EQ:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- return IsZero
- ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
- : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
- .getReg(0);
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::GE:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- return IsZero
- ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
- : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
- .getReg(0);
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::GT:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- return IsZero
- ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
- : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
- .getReg(0);
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
};
case AArch64CC::LS:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- return IsZero
- ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
- : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
- .getReg(0);
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
};
case AArch64CC::MI:
- return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
- return IsZero
- ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
- : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
- .getReg(0);
+ return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
+ return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
};
}
}
@@ -1024,23 +1005,17 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
LLT DstTy = MRI.getType(Dst);
- auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
-
- // Compares against 0 have special target-specific pseudos.
- bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
-
bool Invert = false;
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
Pred == CmpInst::Predicate::FCMP_UNO) &&
- IsZero) {
+ isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
// NaN, so equivalent to a == a and doesn't need the two comparisons an
// "ord" normally would.
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
// thus equivalent to a != a.
RHS = LHS;
- IsZero = false;
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
} else
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1051,12 +1026,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
const bool NoNans =
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
- auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
+ auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
Register CmpRes;
if (CC2 == AArch64CC::AL)
CmpRes = Cmp(MIB);
else {
- auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
+ auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
auto Cmp2Dst = Cmp2(MIB);
auto Cmp1Dst = Cmp(MIB);
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
index 1f5fb892df582..afc4e9eac7547 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
@@ -31,14 +31,15 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Should be inverted. Needs two compares.
; CHECK-LABEL: name: oeq_zero
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMEQZ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQZ %lhs
- ; CHECK-NEXT: $q0 = COPY [[FCMEQZ]](<2 x s64>)
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQ %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMEQ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -82,8 +83,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMGTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGTZ %lhs
- ; CHECK-NEXT: $q0 = COPY [[FCMGTZ]](<2 x s64>)
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMGT]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -123,14 +126,15 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Should be inverted. Needs two compares.
; CHECK-LABEL: name: oge_zero
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMGEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGEZ %lhs
- ; CHECK-NEXT: $q0 = COPY [[FCMGEZ]](<2 x s64>)
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMGE]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -174,8 +178,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMLTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLTZ %lhs
- ; CHECK-NEXT: $q0 = COPY [[FCMLTZ]](<2 x s64>)
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %zero_vec, %lhs(<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMGT]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -218,8 +224,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMLEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLEZ %lhs
- ; CHECK-NEXT: $q0 = COPY [[FCMLEZ]](<2 x s64>)
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %zero_vec, %lhs(<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMGE]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -237,7 +245,6 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Two compares.
; CHECK-LABEL: name: one
; CHECK: liveins: $q0, $q1
@@ -264,15 +271,16 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Two compares.
; CHECK-LABEL: name: one_zero
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[FCMGTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGTZ %lhs
- ; CHECK-NEXT: [[FCMLTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLTZ %lhs
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[FCMLTZ]], [[FCMGTZ]]
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: [[FCMGT1:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %zero_vec, %lhs(<2 x s64>)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[FCMGT1]], [[FCMGT]]
; CHECK-NEXT: $q0 = COPY [[OR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -291,7 +299,6 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Should be inverted. Needs two compares.
; CHECK-LABEL: name: uno
; CHECK: liveins: $q0, $q1
@@ -348,7 +355,6 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Needs two compares. No invert.
; CHECK-LABEL: name: ord
; CHECK: liveins: $q0, $q1
@@ -375,7 +381,6 @@ body: |
bb.0:
liveins: $q0, $q1
- ; Needs two compares. No invert.
; CHECK-LABEL: name: ord_zero
; CHECK: liv...
[truncated]
|
llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
Outdated
Show resolved
Hide resolved
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity. The global-isel changes are due to isBuildVectorAllZeros not identifying all zero floating point vectors. Despite the use of getAnyConstantSplat, which does work, its result is wrapped inside m_SpecificICst that pushes us down an integer only path. I am new to global-isel so is it safe to assume the result of getAnyConstantSplat can be tested directly?
b296e09
to
447b338
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like a nice clean up. For at least the GISel part LGTM.
@@ -936,55 +934,38 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) { | |||
/// \param [in] IsZero - True if the comparison is against 0. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Obsolete comment line
We started seeing an assertion failure, and I bisected it to this commit:
|
I'm attaching a reproducer: |
This reverts commit 15d8b3c.
@gulfemsavrun: Sorry for the failure and thanks for the reproducer. I've confirmed my PR is responsible and have landed a revert whilst I investigate further. |
…)" This reverts commit 427b644. Original patch has been updated to include a fix to esnure AArch64InstructionSelector::emitConstantVector supports all the cases where isBuildVectorAllOnes returns true.
I've landed a fixed version, turns out emitConstantVector did not support floating-point vector constants. |
Thanks for the fix, and I verified that your fix resolved the issue. |
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity.
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity.
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity.
We can easily select compare-to-zero instructions without dedicated nodes. The test changes show opportunities that were previous missed because of the redundant complexity.
The global-isel changes are due to isBuildVectorAllZeros not identifying all zero floating point vectors. Despite the use of getAnyConstantSplat, which does work, its result is wrapped inside m_SpecificICst that pushes us down an integer only path. I am new to global-isel so is it safe to assume the result of getAnyConstantSplat can be tested directly?