Skip to content

Commit c753533

Browse files
committed
[X86] combineConcatVectorOps - add 256-bit concat(shuffle(),shuffle()) handling
Improve IsConcatFree detection to handle splat vector-loads (which can be folded as X86ISD::SUBV_BROADCAST_LOAD). Fixes #114959
1 parent 9f8c3d3 commit c753533

File tree

2 files changed

+31
-34
lines changed

2 files changed

+31
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56933,6 +56933,11 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5693356933
bool AllConstants = true;
5693456934
bool AllSubs = true;
5693556935
unsigned VecSize = VT.getSizeInBits();
56936+
SDValue BC0 = peekThroughBitcasts(SubOps[0].getOperand(Op));
56937+
if (isa<LoadSDNode>(BC0) && all_of(SubOps, [&](SDValue SubOp) {
56938+
return BC0 == peekThroughBitcasts(SubOp.getOperand(Op));
56939+
}))
56940+
return true;
5693656941
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
5693756942
SDValue BC = peekThroughBitcasts(SubOps[I].getOperand(Op));
5693856943
unsigned SubSize = BC.getValueSizeInBits();
@@ -56947,6 +56952,26 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5694756952
};
5694856953

5694956954
switch (Op0.getOpcode()) {
56955+
case ISD::VECTOR_SHUFFLE: {
56956+
if (NumOps == 2 && VT.is256BitVector() &&
56957+
(EltSizeInBits >= 32 || Subtarget.hasInt256()) &&
56958+
(IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
56959+
int NumSubElts = Op0.getValueType().getVectorNumElements();
56960+
SmallVector<int> NewMask;
56961+
for (int M : cast<ShuffleVectorSDNode>(Ops[0])->getMask()) {
56962+
M = M >= NumSubElts ? M + NumSubElts : M;
56963+
NewMask.push_back(M);
56964+
}
56965+
for (int M : cast<ShuffleVectorSDNode>(Ops[1])->getMask()) {
56966+
if (0 <= M)
56967+
M = (M >= NumSubElts ? M + NumSubElts : M) + NumSubElts;
56968+
NewMask.push_back(M);
56969+
}
56970+
return DAG.getVectorShuffle(VT, DL, ConcatSubOperand(VT, Ops, 0),
56971+
ConcatSubOperand(VT, Ops, 1), NewMask);
56972+
}
56973+
break;
56974+
}
5695056975
case X86ISD::VBROADCAST: {
5695156976
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
5695256977
return Op.getOperand(0).getValueType().is128BitVector();

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1810,40 +1810,12 @@ define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
18101810

18111811
; PR114959
18121812
define <4 x double> @concat_v4f64_0213_broadcasts(ptr %src) {
1813-
; AVX1OR2-LABEL: concat_v4f64_0213_broadcasts:
1814-
; AVX1OR2: # %bb.0:
1815-
; AVX1OR2-NEXT: vmovups (%rdi), %xmm0
1816-
; AVX1OR2-NEXT: vmovups 32(%rdi), %xmm1
1817-
; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1818-
; AVX1OR2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1819-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1820-
; AVX1OR2-NEXT: retq
1821-
;
1822-
; AVX512VL-SLOW-LABEL: concat_v4f64_0213_broadcasts:
1823-
; AVX512VL-SLOW: # %bb.0:
1824-
; AVX512VL-SLOW-NEXT: vmovups (%rdi), %xmm0
1825-
; AVX512VL-SLOW-NEXT: vmovups 32(%rdi), %xmm1
1826-
; AVX512VL-SLOW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1827-
; AVX512VL-SLOW-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1828-
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1829-
; AVX512VL-SLOW-NEXT: retq
1830-
;
1831-
; AVX512VL-FAST-ALL-LABEL: concat_v4f64_0213_broadcasts:
1832-
; AVX512VL-FAST-ALL: # %bb.0:
1833-
; AVX512VL-FAST-ALL-NEXT: vmovupd (%rdi), %xmm1
1834-
; AVX512VL-FAST-ALL-NEXT: vmovupd 32(%rdi), %xmm2
1835-
; AVX512VL-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm0 = [0,4,1,5]
1836-
; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm2, %ymm1, %ymm0
1837-
; AVX512VL-FAST-ALL-NEXT: retq
1838-
;
1839-
; AVX512VL-FAST-PERLANE-LABEL: concat_v4f64_0213_broadcasts:
1840-
; AVX512VL-FAST-PERLANE: # %bb.0:
1841-
; AVX512VL-FAST-PERLANE-NEXT: vmovups (%rdi), %xmm0
1842-
; AVX512VL-FAST-PERLANE-NEXT: vmovups 32(%rdi), %xmm1
1843-
; AVX512VL-FAST-PERLANE-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1844-
; AVX512VL-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1845-
; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1846-
; AVX512VL-FAST-PERLANE-NEXT: retq
1813+
; ALL-LABEL: concat_v4f64_0213_broadcasts:
1814+
; ALL: # %bb.0:
1815+
; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
1816+
; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
1817+
; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
1818+
; ALL-NEXT: retq
18471819
%src.hi = getelementptr inbounds i8, ptr %src, i64 32
18481820
%lo = load <2 x double>, ptr %src, align 1
18491821
%hi = load <2 x double>, ptr %src.hi, align 1

0 commit comments

Comments
 (0)