Skip to content

Commit b2bf017

Browse files
authored
[X86] X86FixupInstTuning - prefer VPBLENDD to VPBLENDW shuffles on AVX2+ targets (#144269)
On many Intel AVX2 targets (Haswell+), VPBLENDD has notably better throughput than VPBLENDW - and the remaining Intel/AMD targets have no preference. This patch replaces VPBLENDW shuffles if the shuffle mask can be safely widened from vXi16 to vXi32 and that the scheduler model doesn't consider it a regression (I haven't found any target where this is true, but we should retain the model check). Noticed while working on #142972 where VMOVSS nodes were regressing to VPBLENDW nodes during domain switching.
1 parent eddab9b commit b2bf017

10 files changed

+89
-54
lines changed

llvm/lib/Target/X86/X86FixupInstTuning.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,26 @@ bool X86FixupInstTuningPass::processInstruction(
242242
return ProcessUNPCKToIntDomain(NewOpc);
243243
};
244244

245+
auto ProcessBLENDWToBLENDD = [&](unsigned MovOpc, unsigned NumElts) -> bool {
246+
if (!ST->hasAVX2() || !NewOpcPreferable(MovOpc))
247+
return false;
248+
// Convert to VPBLENDD if scaling the VPBLENDW mask down/up loses no bits.
249+
APInt MaskW =
250+
APInt(8, MI.getOperand(NumOperands - 1).getImm(), /*IsSigned=*/false);
251+
APInt MaskD = APIntOps::ScaleBitMask(MaskW, 4, /*MatchAllBits=*/true);
252+
if (MaskW != APIntOps::ScaleBitMask(MaskD, 8, /*MatchAllBits=*/true))
253+
return false;
254+
APInt NewMaskD = APInt::getSplat(NumElts, MaskD);
255+
LLVM_DEBUG(dbgs() << "Replacing: " << MI);
256+
{
257+
MI.setDesc(TII->get(MovOpc));
258+
MI.removeOperand(NumOperands - 1);
259+
MI.addOperand(MachineOperand::CreateImm(NewMaskD.getZExtValue()));
260+
}
261+
LLVM_DEBUG(dbgs() << " With: " << MI);
262+
return true;
263+
};
264+
245265
auto ProcessBLENDToMOV = [&](unsigned MovOpc, unsigned Mask,
246266
unsigned MovImm) -> bool {
247267
if ((MI.getOperand(NumOperands - 1).getImm() & Mask) != MovImm)
@@ -270,6 +290,12 @@ bool X86FixupInstTuningPass::processInstruction(
270290
return ProcessBLENDToMOV(X86::VMOVSSrr, 0xF, 0x1) ||
271291
ProcessBLENDToMOV(X86::VMOVSDrr, 0xF, 0x3);
272292

293+
case X86::VPBLENDWrri:
294+
// TODO: Add X86::VPBLENDWrmi handling
295+
// TODO: Add X86::VPBLENDWYrri handling
296+
// TODO: Add X86::VPBLENDWYrmi handling
297+
return ProcessBLENDWToBLENDD(X86::VPBLENDDrri, 4);
298+
273299
case X86::VPERMILPDri:
274300
return ProcessVPERMILPDri(X86::VSHUFPDrri);
275301
case X86::VPERMILPDYri:

llvm/test/CodeGen/X86/combine-or-shuffle.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
424424
; AVX512: # %bb.0:
425425
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
426426
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
427-
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
427+
; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
428428
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
429429
; AVX512-NEXT: retq
430430
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>

llvm/test/CodeGen/X86/dpbusd.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,8 @@ define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {
317317
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
318318
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
319319
; AVXVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
320-
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
321-
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
320+
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
321+
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
322322
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
323323
; AVXVNNI-NEXT: vmovd %xmm2, %eax
324324
; AVXVNNI-NEXT: addl %edx, %eax
@@ -328,9 +328,9 @@ define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {
328328
; AVX512VNNI: # %bb.0: # %entry
329329
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
330330
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
331-
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
331+
; AVX512VNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
332332
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
333-
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3,4,5,6,7]
333+
; AVX512VNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
334334
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
335335
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
336336
; AVX512VNNI-NEXT: vmovd %xmm2, %eax
@@ -343,8 +343,8 @@ define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {
343343
; AVX512VLVNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
344344
; AVX512VLVNNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
345345
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
346-
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
347-
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
346+
; AVX512VLVNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
347+
; AVX512VLVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
348348
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
349349
; AVX512VLVNNI-NEXT: vpdpbusd %xmm1, %xmm0, %xmm2
350350
; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax

llvm/test/CodeGen/X86/dpbusd_const.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i32 @mul_4xi8_zc(<4 x i8> %a, i32 %c) {
2727
; AVXVNNI-LABEL: mul_4xi8_zc:
2828
; AVXVNNI: # %bb.0: # %entry
2929
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
30-
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
30+
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
3131
; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
3232
; AVXVNNI-NEXT: vmovd %xmm1, %eax
3333
; AVXVNNI-NEXT: addl %edi, %eax
@@ -36,7 +36,7 @@ define i32 @mul_4xi8_zc(<4 x i8> %a, i32 %c) {
3636
; AVX512VNNI-LABEL: mul_4xi8_zc:
3737
; AVX512VNNI: # %bb.0: # %entry
3838
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
39-
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
39+
; AVX512VNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4040
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
4141
; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
4242
; AVX512VNNI-NEXT: vmovd %xmm1, %eax
@@ -47,7 +47,7 @@ define i32 @mul_4xi8_zc(<4 x i8> %a, i32 %c) {
4747
; AVX512VLVNNI-LABEL: mul_4xi8_zc:
4848
; AVX512VLVNNI: # %bb.0: # %entry
4949
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
50-
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
50+
; AVX512VLVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5151
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
5252
; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
5353
; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax
@@ -67,7 +67,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
6767
; AVXVNNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
6868
; AVXVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6969
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
70-
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
70+
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
7171
; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
7272
; AVXVNNI-NEXT: vmovd %xmm1, %eax
7373
; AVXVNNI-NEXT: addl %edi, %eax
@@ -78,7 +78,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
7878
; AVX512VNNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
7979
; AVX512VNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8080
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
81-
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
81+
; AVX512VNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
8282
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
8383
; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
8484
; AVX512VNNI-NEXT: vmovd %xmm1, %eax
@@ -107,7 +107,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
107107
; AVXVNNI-LABEL: mul_4xi8_cs:
108108
; AVXVNNI: # %bb.0: # %entry
109109
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
110-
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
110+
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
111111
; AVXVNNI-NEXT: vmovd {{.*#+}} xmm2 = [16,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
112112
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1
113113
; AVXVNNI-NEXT: vmovd %xmm1, %eax
@@ -117,7 +117,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
117117
; AVX512VNNI-LABEL: mul_4xi8_cs:
118118
; AVX512VNNI: # %bb.0: # %entry
119119
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
120-
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
120+
; AVX512VNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
121121
; AVX512VNNI-NEXT: vmovd {{.*#+}} xmm1 = [16,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
122122
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
123123
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
@@ -129,7 +129,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
129129
; AVX512VLVNNI-LABEL: mul_4xi8_cs:
130130
; AVX512VLVNNI: # %bb.0: # %entry
131131
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
132-
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
132+
; AVX512VLVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133133
; AVX512VLVNNI-NEXT: vmovd {{.*#+}} xmm1 = [16,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
134134
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
135135
; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2

llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
10141014
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
10151015
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10161016
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1017-
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1017+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
10181018
; AVX2-NEXT: vzeroupper
10191019
; AVX2-NEXT: retq
10201020
;
@@ -1023,7 +1023,7 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
10231023
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
10241024
; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
10251025
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1026-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1026+
; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
10271027
; AVX512F-NEXT: vzeroupper
10281028
; AVX512F-NEXT: retq
10291029
;
@@ -1038,7 +1038,7 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
10381038
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
10391039
; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
10401040
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1041-
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1041+
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
10421042
; AVX512BW-NEXT: vzeroupper
10431043
; AVX512BW-NEXT: retq
10441044
;

llvm/test/CodeGen/X86/vector-reduce-add-mask.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ define i64 @test_v4i64_v4i16(<4 x i64> %a0) {
112112
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
113113
; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
114114
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
115-
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
115+
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
116116
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
117117
; AVX512BW-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
118118
; AVX512BW-NEXT: vmovq %xmm0, %rax

llvm/test/CodeGen/X86/vector-reduce-add-zext.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,15 +231,15 @@ define i32 @test_v4i32(<4 x i8> %a0) {
231231
; AVX2-LABEL: test_v4i32:
232232
; AVX2: # %bb.0:
233233
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
234-
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
234+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
235235
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
236236
; AVX2-NEXT: vmovd %xmm0, %eax
237237
; AVX2-NEXT: retq
238238
;
239239
; AVX512-LABEL: test_v4i32:
240240
; AVX512: # %bb.0:
241241
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
242-
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
242+
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
243243
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
244244
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
245245
; AVX512-NEXT: vmovd %xmm0, %eax

llvm/test/CodeGen/X86/vector-reduce-add.ll

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,19 +1025,28 @@ define i8 @test_v4i8(<4 x i8> %a0) {
10251025
; SSE41-NEXT: # kill: def $al killed $al killed $eax
10261026
; SSE41-NEXT: retq
10271027
;
1028-
; AVX-LABEL: test_v4i8:
1029-
; AVX: # %bb.0:
1030-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1031-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1032-
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
1033-
; AVX-NEXT: vmovd %xmm0, %eax
1034-
; AVX-NEXT: # kill: def $al killed $al killed $eax
1035-
; AVX-NEXT: retq
1028+
; AVX1-LABEL: test_v4i8:
1029+
; AVX1: # %bb.0:
1030+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1031+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1032+
; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
1033+
; AVX1-NEXT: vmovd %xmm0, %eax
1034+
; AVX1-NEXT: # kill: def $al killed $al killed $eax
1035+
; AVX1-NEXT: retq
1036+
;
1037+
; AVX2-LABEL: test_v4i8:
1038+
; AVX2: # %bb.0:
1039+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1040+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1041+
; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
1042+
; AVX2-NEXT: vmovd %xmm0, %eax
1043+
; AVX2-NEXT: # kill: def $al killed $al killed $eax
1044+
; AVX2-NEXT: retq
10361045
;
10371046
; AVX512-LABEL: test_v4i8:
10381047
; AVX512: # %bb.0:
10391048
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1040-
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1049+
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
10411050
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
10421051
; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10431052
; AVX512-NEXT: vmovd %xmm0, %eax

0 commit comments

Comments
 (0)