@@ -1754,10 +1754,9 @@ define void @vec256_i16_widen_to_i64_factor4_broadcast_to_v4i64_factor4(ptr %in.
1754
1754
; AVX512BW: # %bb.0:
1755
1755
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
1756
1756
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1757
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1758
- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,1,2,3,16,5,6,7,16,9,10,11,16,13,14,15]
1759
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1760
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1757
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,18,19,0,21,22,23,0,25,26,27,0,29,30,31]
1758
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1759
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
1761
1760
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
1762
1761
; AVX512BW-NEXT: vzeroupper
1763
1762
; AVX512BW-NEXT: retq
@@ -1870,10 +1869,9 @@ define void @vec256_i16_widen_to_i128_factor8_broadcast_to_v2i128_factor2(ptr %i
1870
1869
; AVX512BW: # %bb.0:
1871
1870
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
1872
1871
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1873
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1874
- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,1,2,3,4,5,6,7,16,9,10,11,12,13,14,15]
1875
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1876
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1872
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,18,19,20,21,22,23,0,25,26,27,28,29,30,31]
1873
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1874
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
1877
1875
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
1878
1876
; AVX512BW-NEXT: vzeroupper
1879
1877
; AVX512BW-NEXT: retq
@@ -3776,12 +3774,11 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3776
3774
; AVX512BW: # %bb.0:
3777
3775
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
3778
3776
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3779
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3780
- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [16,9,10,11,16,13,14,15,16,9,10,11,16,13,14,15]
3781
- ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
3782
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
3777
+ ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,0,29,30,31,0,25,26,27,0,29,30,31]
3778
+ ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
3779
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
3783
3780
; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
3784
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2 , %zmm0
3781
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1 , %zmm0
3785
3782
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3786
3783
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
3787
3784
; AVX512BW-NEXT: vzeroupper
@@ -3911,11 +3908,10 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3911
3908
; AVX512BW: # %bb.0:
3912
3909
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
3913
3910
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3914
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3915
- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,9,10,11,12,13,16,15,0,0,0,0,16,0,0,0]
3916
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
3911
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,0,31,0,0,0,0,0,0,0,0]
3912
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
3917
3913
; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
3918
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2 , %zmm0
3914
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1 , %zmm0
3919
3915
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3920
3916
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
3921
3917
; AVX512BW-NEXT: vzeroupper
@@ -4037,11 +4033,10 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
4037
4033
; AVX512BW: # %bb.0:
4038
4034
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
4039
4035
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4040
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
4041
- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [16,9,10,11,12,13,14,15,16,9,10,11,12,13,14,15]
4042
- ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
4043
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
4044
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
4036
+ ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,25,26,27,28,29,30,31]
4037
+ ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
4038
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
4039
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
4045
4040
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
4046
4041
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
4047
4042
; AVX512BW-NEXT: vzeroupper
@@ -4151,10 +4146,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4151
4146
; AVX512BW: # %bb.0:
4152
4147
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
4153
4148
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4154
- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
4155
- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,9,10,11,12,13,14,15,0,0,0,0,16,0,0,0]
4156
- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
4157
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
4149
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,0,0,0,0,0,0,0]
4150
+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
4151
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
4158
4152
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
4159
4153
; AVX512BW-NEXT: vzeroupper
4160
4154
; AVX512BW-NEXT: retq
0 commit comments