Skip to content

Commit 73e14de

Browse files
authored
[X86] combineConcatVectorOps - recursively call combineConcatVectorOps instead of predicting when ops will freely concat (#130275)
The IsConcatFree helper is limited to estimates on where concatenating the subvector operands is beneficial, this patch replaces FADD/FSUB/FMUL concatenation checks with a recursive call to combineConcatVectorOps to see if it will profitably concatenate further up the chain. Other opcodes can be moved to using the CombineSubOperand helper in future patches.
1 parent f3390fc commit 73e14de

File tree

2 files changed

+153
-355
lines changed

2 files changed

+153
-355
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41903,7 +41903,8 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
4190341903
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
4190441904
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
4190541905
TargetLowering::DAGCombinerInfo &DCI,
41906-
const X86Subtarget &Subtarget);
41906+
const X86Subtarget &Subtarget,
41907+
unsigned Depth = 0);
4190741908

4190841909
/// Try to combine x86 target specific shuffles.
4190941910
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
@@ -57791,7 +57792,8 @@ CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,
5779157792
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5779257793
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
5779357794
TargetLowering::DAGCombinerInfo &DCI,
57794-
const X86Subtarget &Subtarget) {
57795+
const X86Subtarget &Subtarget,
57796+
unsigned Depth) {
5779557797
assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
5779657798
unsigned EltSizeInBits = VT.getScalarSizeInBits();
5779757799

@@ -57803,6 +57805,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5780357805
}))
5780457806
return getZeroVector(VT, Subtarget, DAG, DL);
5780557807

57808+
if (Depth >= SelectionDAG::MaxRecursionDepth)
57809+
return SDValue(); // Limit search depth.
57810+
5780657811
SDValue Op0 = Ops[0];
5780757812
bool IsSplat = llvm::all_equal(Ops);
5780857813
unsigned NumOps = Ops.size();
@@ -57933,6 +57938,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5793357938
}
5793457939
return AllConstants || AllSubs;
5793557940
};
57941+
auto CombineSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
57942+
bool AllConstants = true;
57943+
SmallVector<SDValue> Subs;
57944+
for (SDValue SubOp : SubOps) {
57945+
SDValue BC = peekThroughBitcasts(SubOp.getOperand(I));
57946+
AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
57947+
ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
57948+
Subs.push_back(SubOp.getOperand(I));
57949+
}
57950+
if (AllConstants)
57951+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
57952+
return combineConcatVectorOps(DL, VT, Subs, DAG, DCI, Subtarget,
57953+
Depth + 1);
57954+
};
5793657955

5793757956
switch (Op0.getOpcode()) {
5793857957
case ISD::VECTOR_SHUFFLE: {
@@ -58354,14 +58373,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5835458373
case ISD::FADD:
5835558374
case ISD::FSUB:
5835658375
case ISD::FMUL:
58357-
if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
58358-
(VT.is256BitVector() ||
58359-
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58360-
return DAG.getNode(Op0.getOpcode(), DL, VT,
58361-
ConcatSubOperand(VT, Ops, 0),
58362-
ConcatSubOperand(VT, Ops, 1));
58376+
if (!IsSplat && (VT.is256BitVector() ||
58377+
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58378+
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58379+
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58380+
if (Concat0 || Concat1)
58381+
return DAG.getNode(Op0.getOpcode(), DL, VT,
58382+
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58383+
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
5836358384
}
5836458385
break;
58386+
// Always prefer to concatenate high latency FDIV instructions.
5836558387
case ISD::FDIV:
5836658388
if (!IsSplat && (VT.is256BitVector() ||
5836758389
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {

0 commit comments

Comments
 (0)