Skip to content

Commit 5f8cf33

Browse files
authored
[X86] IsElementEquivalent - add handling for X86ISD::VPERMI nodes. (#142767)
On AVX2+ targets these are often used to splat subvectors.
1 parent fdb11c1 commit 5f8cf33

6 files changed

+39
-30
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9830,6 +9830,17 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
98309830
return (Idx % NumMemElts) == (ExpectedIdx % NumMemElts);
98319831
}
98329832
break;
9833+
case X86ISD::VPERMI: {
9834+
if (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize) {
9835+
SmallVector<int, 8> Mask;
9836+
DecodeVPERMMask(MaskSize, Op.getConstantOperandVal(1), Mask);
9837+
SDValue Src = Op.getOperand(0);
9838+
return (Mask[Idx] == Mask[ExpectedIdx]) ||
9839+
IsElementEquivalent(MaskSize, Src, Src, Mask[Idx],
9840+
Mask[ExpectedIdx]);
9841+
}
9842+
break;
9843+
}
98339844
case X86ISD::HADD:
98349845
case X86ISD::HSUB:
98359846
case X86ISD::FHADD:

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19521952
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19531953
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
19541954
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1955-
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
1955+
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
19561956
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19571957
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19581958
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1965,7 +1965,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19651965
; AVX512F-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19661966
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm1
19671967
; AVX512F-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1968-
; AVX512F-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1968+
; AVX512F-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
19691969
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19701970
; AVX512F-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19711971
; AVX512F-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1991,7 +1991,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19911991
; AVX512DQ-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19921992
; AVX512DQ-SLOW-NEXT: vmovdqa (%rdi), %xmm1
19931993
; AVX512DQ-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1994-
; AVX512DQ-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1994+
; AVX512DQ-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
19951995
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19961996
; AVX512DQ-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19971997
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -2016,7 +2016,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
20162016
; AVX512BW-SLOW-NEXT: vmovdqa64 (%rdi), %zmm0
20172017
; AVX512BW-SLOW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
20182018
; AVX512BW-SLOW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2019-
; AVX512BW-SLOW-NEXT: vpbroadcastq %xmm0, %ymm0
2019+
; AVX512BW-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
20202020
; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
20212021
; AVX512BW-SLOW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
20222022
; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4209,14 +4209,13 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
42094209
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
42104210
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
42114211
; AVX2-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4212-
; AVX2-NEXT: vpbroadcastd %xmm0, %xmm2
42134212
; AVX2-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4214-
; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
4215-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
4216-
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
4217-
; AVX2-NEXT: vpaddb 32(%rdx), %ymm2, %ymm1
4218-
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
4219-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rcx)
4213+
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
4214+
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
4215+
; AVX2-NEXT: vpaddb (%rdx), %ymm1, %ymm1
4216+
; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
4217+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rcx)
4218+
; AVX2-NEXT: vmovdqa %ymm1, (%rcx)
42204219
; AVX2-NEXT: vzeroupper
42214220
; AVX2-NEXT: retq
42224221
;

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,7 +1569,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15691569
;
15701570
; AVX512F-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15711571
; AVX512F: # %bb.0:
1572-
; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
1572+
; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
15731573
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15741574
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15751575
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1578,7 +1578,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15781578
;
15791579
; AVX512DQ-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15801580
; AVX512DQ: # %bb.0:
1581-
; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
1581+
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
15821582
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15831583
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15841584
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1587,7 +1587,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15871587
;
15881588
; AVX512BW-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15891589
; AVX512BW: # %bb.0:
1590-
; AVX512BW-NEXT: vpbroadcastq (%rdi), %ymm0
1590+
; AVX512BW-NEXT: vpbroadcastd (%rdi), %ymm0
15911591
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15921592
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
15931593
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)

llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary(<
342342
; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_3_binary:
343343
; CHECK: # %bb.0:
344344
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,2]
345-
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
345+
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
346346
; CHECK-NEXT: retq
347347
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
348348
ret <4 x i64> %r
@@ -597,8 +597,8 @@ define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_unary(<8
597597
define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<8 x i32> %x, <8 x i32> %y) nounwind {
598598
; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
599599
; CHECK-SLOW: # %bb.0:
600-
; CHECK-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
601-
; CHECK-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
600+
; CHECK-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1
601+
; CHECK-SLOW-NEXT: vbroadcastss %xmm1, %ymm1
602602
; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
603603
; CHECK-SLOW-NEXT: retq
604604
;

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19521952
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19531953
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
19541954
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1955-
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
1955+
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
19561956
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19571957
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19581958
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1965,7 +1965,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19651965
; AVX512F-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19661966
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm1
19671967
; AVX512F-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1968-
; AVX512F-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1968+
; AVX512F-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
19691969
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19701970
; AVX512F-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19711971
; AVX512F-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1991,7 +1991,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
19911991
; AVX512DQ-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
19921992
; AVX512DQ-SLOW-NEXT: vmovdqa (%rdi), %xmm1
19931993
; AVX512DQ-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1994-
; AVX512DQ-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1994+
; AVX512DQ-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
19951995
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
19961996
; AVX512DQ-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
19971997
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -2016,7 +2016,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
20162016
; AVX512BW-SLOW-NEXT: vmovdqa64 (%rdi), %zmm0
20172017
; AVX512BW-SLOW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
20182018
; AVX512BW-SLOW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2019-
; AVX512BW-SLOW-NEXT: vpbroadcastq %xmm0, %ymm0
2019+
; AVX512BW-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
20202020
; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
20212021
; AVX512BW-SLOW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
20222022
; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4649,11 +4649,10 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
46494649
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
46504650
; AVX2-SLOW-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
46514651
; AVX2-SLOW-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4652-
; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %ymm2
4653-
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
4652+
; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
4653+
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
46544654
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
46554655
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
4656-
; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
46574656
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
46584657
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
46594658
; AVX2-SLOW-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -4669,7 +4668,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
46694668
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
46704669
; AVX2-FAST-PERLANE-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
46714670
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4672-
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq %xmm0, %ymm2
4671+
; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm2
46734672
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
46744673
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
46754674
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -4687,7 +4686,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
46874686
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
46884687
; AVX2-FAST-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
46894688
; AVX2-FAST-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4690-
; AVX2-FAST-NEXT: vpbroadcastq %xmm0, %ymm2
4689+
; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm2
46914690
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
46924691
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
46934692
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -6582,7 +6581,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
65826581
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
65836582
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
65846583
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
6585-
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
6584+
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
65866585
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
65876586
; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
65886587
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,7 +1569,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15691569
;
15701570
; AVX512F-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15711571
; AVX512F: # %bb.0:
1572-
; AVX512F-NEXT: vpbroadcastq (%rdi), %ymm0
1572+
; AVX512F-NEXT: vpbroadcastd (%rdi), %ymm0
15731573
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15741574
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15751575
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1578,7 +1578,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15781578
;
15791579
; AVX512DQ-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15801580
; AVX512DQ: # %bb.0:
1581-
; AVX512DQ-NEXT: vpbroadcastq (%rdi), %ymm0
1581+
; AVX512DQ-NEXT: vpbroadcastd (%rdi), %ymm0
15821582
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15831583
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15841584
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1587,7 +1587,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15871587
;
15881588
; AVX512BW-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15891589
; AVX512BW: # %bb.0:
1590-
; AVX512BW-NEXT: vpbroadcastq (%rdi), %ymm0
1590+
; AVX512BW-NEXT: vpbroadcastd (%rdi), %ymm0
15911591
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15921592
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
15931593
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)

0 commit comments

Comments
 (0)