@@ -41903,7 +41903,8 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
41903
41903
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
41904
41904
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
41905
41905
TargetLowering::DAGCombinerInfo &DCI,
41906
- const X86Subtarget &Subtarget);
41906
+ const X86Subtarget &Subtarget,
41907
+ unsigned Depth = 0);
41907
41908
41908
41909
/// Try to combine x86 target specific shuffles.
41909
41910
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
@@ -57791,7 +57792,8 @@ CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,
57791
57792
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
57792
57793
ArrayRef<SDValue> Ops, SelectionDAG &DAG,
57793
57794
TargetLowering::DAGCombinerInfo &DCI,
57794
- const X86Subtarget &Subtarget) {
57795
+ const X86Subtarget &Subtarget,
57796
+ unsigned Depth) {
57795
57797
assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
57796
57798
unsigned EltSizeInBits = VT.getScalarSizeInBits();
57797
57799
@@ -57803,6 +57805,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
57803
57805
}))
57804
57806
return getZeroVector(VT, Subtarget, DAG, DL);
57805
57807
57808
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
57809
+ return SDValue(); // Limit search depth.
57810
+
57806
57811
SDValue Op0 = Ops[0];
57807
57812
bool IsSplat = llvm::all_equal(Ops);
57808
57813
unsigned NumOps = Ops.size();
@@ -57933,6 +57938,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
57933
57938
}
57934
57939
return AllConstants || AllSubs;
57935
57940
};
57941
+ auto CombineSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
57942
+ bool AllConstants = true;
57943
+ SmallVector<SDValue> Subs;
57944
+ for (SDValue SubOp : SubOps) {
57945
+ SDValue BC = peekThroughBitcasts(SubOp.getOperand(I));
57946
+ AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
57947
+ ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
57948
+ Subs.push_back(SubOp.getOperand(I));
57949
+ }
57950
+ if (AllConstants)
57951
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
57952
+ return combineConcatVectorOps(DL, VT, Subs, DAG, DCI, Subtarget,
57953
+ Depth + 1);
57954
+ };
57936
57955
57937
57956
switch (Op0.getOpcode()) {
57938
57957
case ISD::VECTOR_SHUFFLE: {
@@ -58354,14 +58373,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
58354
58373
case ISD::FADD:
58355
58374
case ISD::FSUB:
58356
58375
case ISD::FMUL:
58357
- if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
58358
- (VT.is256BitVector() ||
58359
- (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58360
- return DAG.getNode(Op0.getOpcode(), DL, VT,
58361
- ConcatSubOperand(VT, Ops, 0),
58362
- ConcatSubOperand(VT, Ops, 1));
58376
+ if (!IsSplat && (VT.is256BitVector() ||
58377
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58378
+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58379
+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58380
+ if (Concat0 || Concat1)
58381
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
58382
+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58383
+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
58363
58384
}
58364
58385
break;
58386
+ // Always prefer to concatenate high latency FDIV instructions.
58365
58387
case ISD::FDIV:
58366
58388
if (!IsSplat && (VT.is256BitVector() ||
58367
58389
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
0 commit comments