Skip to content

Commit e03044f

Browse files
authored
[X86] splitVector - use collectConcatOps to find pre-split subvectors (#142774)
Don't just match ISD::CONCAT_VECTORS - this matches more closely with isFreeToSplitVector
1 parent 89cea0d commit e03044f

11 files changed

+551
-524
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4349,13 +4349,13 @@ static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
43494349
assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
43504350
"Can't split odd sized vector");
43514351

4352-
if (Op.getOpcode() == ISD::CONCAT_VECTORS) {
4353-
assert((Op.getNumOperands() % 2) == 0 &&
4354-
"Can't split odd sized vector concat");
4355-
unsigned HalfOps = Op.getNumOperands() / 2;
4352+
SmallVector<SDValue, 4> SubOps;
4353+
if (collectConcatOps(Op.getNode(), SubOps, DAG)) {
4354+
assert((SubOps.size() % 2) == 0 && "Can't split odd sized vector concat");
4355+
unsigned HalfOps = SubOps.size() / 2;
43564356
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
4357-
SmallVector<SDValue, 2> LoOps(Op->op_begin(), Op->op_begin() + HalfOps);
4358-
SmallVector<SDValue, 2> HiOps(Op->op_begin() + HalfOps, Op->op_end());
4357+
SmallVector<SDValue, 2> LoOps(SubOps.begin(), SubOps.begin() + HalfOps);
4358+
SmallVector<SDValue, 2> HiOps(SubOps.begin() + HalfOps, SubOps.end());
43594359
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, LoOps);
43604360
SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, HiOps);
43614361
return std::make_pair(Lo, Hi);

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,19 +1105,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11051105
; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1
11061106
; AVX512-VL-NEXT: vmovdqa64 128(%rdi), %zmm2
11071107
; AVX512-VL-NEXT: vmovdqa64 192(%rdi), %zmm3
1108-
; AVX512-VL-NEXT: vpmovdw %zmm1, %ymm4
1109-
; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm1
1110-
; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm5
1111-
; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm6
1112-
; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm7
1108+
; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm4
1109+
; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm5
1110+
; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm6
1111+
; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm7
1112+
; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rsi)
11131113
; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi)
1114-
; AVX512-VL-NEXT: vmovdqa %ymm4, 32(%rsi)
1115-
; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
11161114
; AVX512-VL-NEXT: vpmovdw %zmm3, 96(%rsi)
1117-
; AVX512-VL-NEXT: vpmovdw %zmm7, 64(%rdx)
1118-
; AVX512-VL-NEXT: vpmovdw %zmm6, 96(%rdx)
1119-
; AVX512-VL-NEXT: vpmovdw %zmm5, (%rdx)
1120-
; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rdx)
1115+
; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
1116+
; AVX512-VL-NEXT: vpmovdw %zmm7, 96(%rdx)
1117+
; AVX512-VL-NEXT: vpmovdw %zmm6, 64(%rdx)
1118+
; AVX512-VL-NEXT: vpmovdw %zmm5, 32(%rdx)
1119+
; AVX512-VL-NEXT: vpmovdw %zmm4, (%rdx)
11211120
; AVX512-VL-NEXT: vzeroupper
11221121
; AVX512-VL-NEXT: retq
11231122
;
@@ -1127,19 +1126,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11271126
; AVX512-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
11281127
; AVX512-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
11291128
; AVX512-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1130-
; AVX512-FCP-NEXT: vpmovdw %zmm1, %ymm4
1131-
; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1132-
; AVX512-FCP-NEXT: vpsrld $16, %zmm0, %zmm5
1133-
; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1134-
; AVX512-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1129+
; AVX512-FCP-NEXT: vpsrld $16, %zmm0, %zmm4
1130+
; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1131+
; AVX512-FCP-NEXT: vpsrld $16, %zmm2, %zmm6
1132+
; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1133+
; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
11351134
; AVX512-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1136-
; AVX512-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1137-
; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
11381135
; AVX512-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1139-
; AVX512-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1140-
; AVX512-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1141-
; AVX512-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1142-
; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1136+
; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1137+
; AVX512-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1138+
; AVX512-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1139+
; AVX512-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1140+
; AVX512-FCP-NEXT: vpmovdw %zmm4, (%rdx)
11431141
; AVX512-FCP-NEXT: vzeroupper
11441142
; AVX512-FCP-NEXT: retq
11451143
;
@@ -1149,19 +1147,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11491147
; AVX512DQ-NEXT: vmovdqa64 64(%rdi), %zmm1
11501148
; AVX512DQ-NEXT: vmovdqa64 128(%rdi), %zmm2
11511149
; AVX512DQ-NEXT: vmovdqa64 192(%rdi), %zmm3
1152-
; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm4
1153-
; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm1
1154-
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm5
1155-
; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm6
1156-
; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm7
1150+
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm4
1151+
; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm5
1152+
; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm6
1153+
; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm7
1154+
; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rsi)
11571155
; AVX512DQ-NEXT: vpmovdw %zmm0, (%rsi)
1158-
; AVX512DQ-NEXT: vmovdqa %ymm4, 32(%rsi)
1159-
; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
11601156
; AVX512DQ-NEXT: vpmovdw %zmm3, 96(%rsi)
1161-
; AVX512DQ-NEXT: vpmovdw %zmm7, 64(%rdx)
1162-
; AVX512DQ-NEXT: vpmovdw %zmm6, 96(%rdx)
1163-
; AVX512DQ-NEXT: vpmovdw %zmm5, (%rdx)
1164-
; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rdx)
1157+
; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
1158+
; AVX512DQ-NEXT: vpmovdw %zmm7, 96(%rdx)
1159+
; AVX512DQ-NEXT: vpmovdw %zmm6, 64(%rdx)
1160+
; AVX512DQ-NEXT: vpmovdw %zmm5, 32(%rdx)
1161+
; AVX512DQ-NEXT: vpmovdw %zmm4, (%rdx)
11651162
; AVX512DQ-NEXT: vzeroupper
11661163
; AVX512DQ-NEXT: retq
11671164
;
@@ -1171,19 +1168,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11711168
; AVX512DQ-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
11721169
; AVX512DQ-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
11731170
; AVX512DQ-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1174-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, %ymm4
1175-
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1176-
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0, %zmm5
1177-
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1178-
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1171+
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0, %zmm4
1172+
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1173+
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2, %zmm6
1174+
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1175+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
11791176
; AVX512DQ-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1180-
; AVX512DQ-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1181-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
11821177
; AVX512DQ-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1183-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1184-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1185-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1186-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1178+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1179+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1180+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1181+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1182+
; AVX512DQ-FCP-NEXT: vpmovdw %zmm4, (%rdx)
11871183
; AVX512DQ-FCP-NEXT: vzeroupper
11881184
; AVX512DQ-FCP-NEXT: retq
11891185
;

0 commit comments

Comments
 (0)