Skip to content

Commit 5dba4ed

Browse files
committed
[X86][AVX] Combine INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)) as shuffle
Subvector shuffling often ends up as insert/extract subvector. llvm-svn: 364090
1 parent e4956d2 commit 5dba4ed

File tree

6 files changed

+148
-86
lines changed

6 files changed

+148
-86
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6706,20 +6706,33 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
67066706
return true;
67076707
}
67086708
case ISD::INSERT_SUBVECTOR: {
6709-
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
67106709
SDValue Src = N.getOperand(0);
67116710
SDValue Sub = N.getOperand(1);
67126711
EVT SubVT = Sub.getValueType();
67136712
unsigned NumSubElts = SubVT.getVectorNumElements();
67146713
if (!isa<ConstantSDNode>(N.getOperand(2)) ||
67156714
!N->isOnlyUserOf(Sub.getNode()))
67166715
return false;
6716+
int InsertIdx = N.getConstantOperandVal(2);
6717+
// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
6718+
if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
6719+
Sub.getOperand(0).getValueType() == VT &&
6720+
isa<ConstantSDNode>(Sub.getOperand(1))) {
6721+
int ExtractIdx = Sub.getConstantOperandVal(1);
6722+
for (int i = 0; i != (int)NumElts; ++i)
6723+
Mask.push_back(i);
6724+
for (int i = 0; i != (int)NumSubElts; ++i)
6725+
Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
6726+
Ops.push_back(Src);
6727+
Ops.push_back(Sub.getOperand(0));
6728+
return true;
6729+
}
6730+
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
67176731
SmallVector<int, 64> SubMask;
67186732
SmallVector<SDValue, 2> SubInputs;
67196733
if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
67206734
SubMask, DAG))
67216735
return false;
6722-
int InsertIdx = N.getConstantOperandVal(2);
67236736
if (SubMask.size() != NumSubElts) {
67246737
assert(((SubMask.size() % NumSubElts) == 0 ||
67256738
(NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");

llvm/test/CodeGen/X86/var-permute-128.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,36 +1027,36 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
10271027
; AVX2-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
10281028
; AVX2: # %bb.0:
10291029
; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
1030-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1031-
; AVX2-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1030+
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
1031+
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
10321032
; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
10331033
; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
1034-
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
1034+
; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
10351035
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
10361036
; AVX2-NEXT: vzeroupper
10371037
; AVX2-NEXT: retq
10381038
;
10391039
; AVX512-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
10401040
; AVX512: # %bb.0:
10411041
; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
1042-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
1043-
; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1042+
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm2
1043+
; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
10441044
; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
10451045
; AVX512-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
1046-
; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
1046+
; AVX512-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
10471047
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
10481048
; AVX512-NEXT: vzeroupper
10491049
; AVX512-NEXT: retq
10501050
;
10511051
; AVX512VLBW-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
10521052
; AVX512VLBW: # %bb.0:
10531053
; AVX512VLBW-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
1054-
; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm2
1055-
; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1054+
; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm2
1055+
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
10561056
; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm0
10571057
; AVX512VLBW-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %k1
1058-
; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1}
1059-
; AVX512VLBW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1058+
; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
1059+
; AVX512VLBW-NEXT: vmovdqa %xmm2, %xmm0
10601060
; AVX512VLBW-NEXT: vzeroupper
10611061
; AVX512VLBW-NEXT: retq
10621062
;

llvm/test/CodeGen/X86/var-permute-256.ll

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -170,39 +170,36 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi
170170
; AVX2: # %bb.0:
171171
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
172172
; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1
173-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
173+
; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
174174
; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm2
175-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
176-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
175+
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
177176
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
178177
; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
179-
; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
178+
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
180179
; AVX2-NEXT: retq
181180
;
182181
; AVX512-LABEL: var_shuffle_v16i16:
183182
; AVX512: # %bb.0:
184183
; AVX512-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
185184
; AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1
186-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
185+
; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
187186
; AVX512-NEXT: vpshufb %ymm1, %ymm2, %ymm2
188-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm3
189-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
187+
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
190188
; AVX512-NEXT: vpshufb %ymm1, %ymm0, %ymm0
191189
; AVX512-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
192-
; AVX512-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
190+
; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
193191
; AVX512-NEXT: retq
194192
;
195193
; AVX512VLDQ-LABEL: var_shuffle_v16i16:
196194
; AVX512VLDQ: # %bb.0:
197195
; AVX512VLDQ-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
198196
; AVX512VLDQ-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1
199-
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
197+
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
200198
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
201-
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm0, %xmm3
202-
; AVX512VLDQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
199+
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
203200
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
204201
; AVX512VLDQ-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
205-
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
202+
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
206203
; AVX512VLDQ-NEXT: retq
207204
;
208205
; AVX512VLBW-LABEL: var_shuffle_v16i16:
@@ -293,45 +290,42 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
293290
;
294291
; AVX2-LABEL: var_shuffle_v32i8:
295292
; AVX2: # %bb.0:
296-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
293+
; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
297294
; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm2
298-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
299-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
295+
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
300296
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
301297
; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
302-
; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
298+
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
303299
; AVX2-NEXT: retq
304300
;
305301
; AVX512-LABEL: var_shuffle_v32i8:
306302
; AVX512: # %bb.0:
307-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
303+
; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
308304
; AVX512-NEXT: vpshufb %ymm1, %ymm2, %ymm2
309-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm3
310-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
305+
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
311306
; AVX512-NEXT: vpshufb %ymm1, %ymm0, %ymm0
312307
; AVX512-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
313-
; AVX512-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
308+
; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
314309
; AVX512-NEXT: retq
315310
;
316311
; AVX512VLDQ-LABEL: var_shuffle_v32i8:
317312
; AVX512VLDQ: # %bb.0:
318-
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
313+
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
319314
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
320-
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm0, %xmm3
321-
; AVX512VLDQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
315+
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
322316
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
323317
; AVX512VLDQ-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
324-
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
318+
; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
325319
; AVX512VLDQ-NEXT: retq
326320
;
327321
; AVX512VLBW-LABEL: var_shuffle_v32i8:
328322
; AVX512VLBW: # %bb.0:
329-
; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm2
330-
; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
331-
; AVX512VLBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
332-
; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm0
323+
; AVX512VLBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
324+
; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm2, %ymm2
325+
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
333326
; AVX512VLBW-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %k1
334-
; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm2, %ymm0 {%k1}
327+
; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1}
328+
; AVX512VLBW-NEXT: vmovdqa %ymm2, %ymm0
335329
; AVX512VLBW-NEXT: retq
336330
;
337331
; VLVBMI-LABEL: var_shuffle_v32i8:

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -447,11 +447,23 @@ define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
447447
}
448448

449449
define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
450-
; ALL-LABEL: shuffle_v4f64_1054:
451-
; ALL: # %bb.0:
452-
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
453-
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
454-
; ALL-NEXT: retq
450+
; AVX1OR2-LABEL: shuffle_v4f64_1054:
451+
; AVX1OR2: # %bb.0:
452+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
453+
; AVX1OR2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
454+
; AVX1OR2-NEXT: retq
455+
;
456+
; AVX512VL-SLOW-LABEL: shuffle_v4f64_1054:
457+
; AVX512VL-SLOW: # %bb.0:
458+
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
459+
; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
460+
; AVX512VL-SLOW-NEXT: retq
461+
;
462+
; AVX512VL-FAST-LABEL: shuffle_v4f64_1054:
463+
; AVX512VL-FAST: # %bb.0:
464+
; AVX512VL-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [1,0,5,4]
465+
; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0
466+
; AVX512VL-FAST-NEXT: retq
455467
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
456468
ret <4 x double> %shuffle
457469
}
@@ -990,19 +1002,11 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
9901002
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
9911003
; AVX2-NEXT: retq
9921004
;
993-
; AVX512VL-SLOW-LABEL: shuffle_v4i64_0142:
994-
; AVX512VL-SLOW: # %bb.0:
995-
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
996-
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
997-
; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
998-
; AVX512VL-SLOW-NEXT: retq
999-
;
1000-
; AVX512VL-FAST-LABEL: shuffle_v4i64_0142:
1001-
; AVX512VL-FAST: # %bb.0:
1002-
; AVX512VL-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1003-
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,6,2]
1004-
; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
1005-
; AVX512VL-FAST-NEXT: retq
1005+
; AVX512VL-LABEL: shuffle_v4i64_0142:
1006+
; AVX512VL: # %bb.0:
1007+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,2]
1008+
; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
1009+
; AVX512VL-NEXT: retq
10061010
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
10071011
ret <4 x i64> %shuffle
10081012
}
@@ -1198,11 +1202,17 @@ define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
11981202
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
11991203
; AVX2-NEXT: retq
12001204
;
1201-
; AVX512VL-LABEL: shuffle_v4i64_1054:
1202-
; AVX512VL: # %bb.0:
1203-
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1204-
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1205-
; AVX512VL-NEXT: retq
1205+
; AVX512VL-SLOW-LABEL: shuffle_v4i64_1054:
1206+
; AVX512VL-SLOW: # %bb.0:
1207+
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1208+
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1209+
; AVX512VL-SLOW-NEXT: retq
1210+
;
1211+
; AVX512VL-FAST-LABEL: shuffle_v4i64_1054:
1212+
; AVX512VL-FAST: # %bb.0:
1213+
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,5,4]
1214+
; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
1215+
; AVX512VL-FAST-NEXT: retq
12061216
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
12071217
ret <4 x i64> %shuffle
12081218
}

llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -935,11 +935,23 @@ define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
935935
}
936936

937937
define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
938-
; ALL-LABEL: shuffle_v8f32_3210ba98:
939-
; ALL: # %bb.0:
940-
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
941-
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
942-
; ALL-NEXT: retq
938+
; AVX1OR2-LABEL: shuffle_v8f32_3210ba98:
939+
; AVX1OR2: # %bb.0:
940+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
941+
; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
942+
; AVX1OR2-NEXT: retq
943+
;
944+
; AVX512VL-SLOW-LABEL: shuffle_v8f32_3210ba98:
945+
; AVX512VL-SLOW: # %bb.0:
946+
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
947+
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
948+
; AVX512VL-SLOW-NEXT: retq
949+
;
950+
; AVX512VL-FAST-LABEL: shuffle_v8f32_3210ba98:
951+
; AVX512VL-FAST: # %bb.0:
952+
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
953+
; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
954+
; AVX512VL-FAST-NEXT: retq
943955
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
944956
ret <8 x float> %shuffle
945957
}
@@ -1064,11 +1076,24 @@ define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
10641076
}
10651077

10661078
define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
1067-
; ALL-LABEL: shuffle_v8f32_ba983210:
1068-
; ALL: # %bb.0:
1069-
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1070-
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1071-
; ALL-NEXT: retq
1079+
; AVX1OR2-LABEL: shuffle_v8f32_ba983210:
1080+
; AVX1OR2: # %bb.0:
1081+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1082+
; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1083+
; AVX1OR2-NEXT: retq
1084+
;
1085+
; AVX512VL-SLOW-LABEL: shuffle_v8f32_ba983210:
1086+
; AVX512VL-SLOW: # %bb.0:
1087+
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1088+
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1089+
; AVX512VL-SLOW-NEXT: retq
1090+
;
1091+
; AVX512VL-FAST-LABEL: shuffle_v8f32_ba983210:
1092+
; AVX512VL-FAST: # %bb.0:
1093+
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
1094+
; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
1095+
; AVX512VL-FAST-NEXT: vmovaps %ymm2, %ymm0
1096+
; AVX512VL-FAST-NEXT: retq
10721097
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
10731098
ret <8 x float> %shuffle
10741099
}
@@ -2240,11 +2265,23 @@ define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
22402265
}
22412266

22422267
define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
2243-
; ALL-LABEL: shuffle_v8i32_3210ba98:
2244-
; ALL: # %bb.0:
2245-
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2246-
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2247-
; ALL-NEXT: retq
2268+
; AVX1OR2-LABEL: shuffle_v8i32_3210ba98:
2269+
; AVX1OR2: # %bb.0:
2270+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2271+
; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2272+
; AVX1OR2-NEXT: retq
2273+
;
2274+
; AVX512VL-SLOW-LABEL: shuffle_v8i32_3210ba98:
2275+
; AVX512VL-SLOW: # %bb.0:
2276+
; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2277+
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2278+
; AVX512VL-SLOW-NEXT: retq
2279+
;
2280+
; AVX512VL-FAST-LABEL: shuffle_v8i32_3210ba98:
2281+
; AVX512VL-FAST: # %bb.0:
2282+
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,11,10,9,8]
2283+
; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
2284+
; AVX512VL-FAST-NEXT: retq
22482285
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
22492286
ret <8 x i32> %shuffle
22502287
}

llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,13 +2252,21 @@ define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
22522252

22532253
;FIXME: compressp
22542254
define <4 x double> @test_v8f64_2346 (<8 x double> %v) {
2255-
; ALL-LABEL: test_v8f64_2346:
2256-
; ALL: # %bb.0:
2257-
; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
2258-
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
2259-
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
2260-
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2261-
; ALL-NEXT: ret{{[l|q]}}
2255+
; AVX512F-LABEL: test_v8f64_2346:
2256+
; AVX512F: # %bb.0:
2257+
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [2,3,4,6,2,3,4,6]
2258+
; AVX512F-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
2259+
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
2260+
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2261+
; AVX512F-NEXT: retq
2262+
;
2263+
; AVX512F-32-LABEL: test_v8f64_2346:
2264+
; AVX512F-32: # %bb.0:
2265+
; AVX512F-32-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [2,0,3,0,4,0,6,0,2,0,3,0,4,0,6,0]
2266+
; AVX512F-32-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
2267+
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
2268+
; AVX512F-32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
2269+
; AVX512F-32-NEXT: retl
22622270
%res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
22632271
ret <4 x double> %res
22642272
}

0 commit comments

Comments
 (0)