@@ -1837,7 +1837,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
1837
1837
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1838
1838
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1839
1839
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1840
- ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0 ,0,0 ]
1840
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1 ,0,1 ]
1841
1841
; SSE2-NEXT: paddb (%rsi), %xmm0
1842
1842
; SSE2-NEXT: movdqa 16(%rsi), %xmm2
1843
1843
; SSE2-NEXT: paddb %xmm1, %xmm2
@@ -1856,7 +1856,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
1856
1856
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1857
1857
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1858
1858
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1859
- ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0 ,0,0 ]
1859
+ ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1 ,0,1 ]
1860
1860
; SSE42-NEXT: paddb (%rsi), %xmm0
1861
1861
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
1862
1862
; SSE42-NEXT: paddb %xmm1, %xmm2
@@ -1875,9 +1875,9 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
1875
1875
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
1876
1876
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1877
1877
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
1878
- ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0 ,0,0 ]
1878
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1 ,0,1 ]
1879
1879
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1880
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
1880
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
1881
1881
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1882
1882
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
1883
1883
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -1954,7 +1954,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
1954
1954
; SSE2-NEXT: pand %xmm1, %xmm2
1955
1955
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1956
1956
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1957
- ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
1957
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
1958
1958
; SSE2-NEXT: pandn %xmm0, %xmm1
1959
1959
; SSE2-NEXT: por %xmm2, %xmm1
1960
1960
; SSE2-NEXT: paddb (%rsi), %xmm1
@@ -2174,7 +2174,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
2174
2174
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
2175
2175
; SSE2-NEXT: pand %xmm0, %xmm1
2176
2176
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2177
- ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0 ,0,0 ]
2177
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1 ,0,1 ]
2178
2178
; SSE2-NEXT: pandn %xmm2, %xmm0
2179
2179
; SSE2-NEXT: por %xmm1, %xmm0
2180
2180
; SSE2-NEXT: paddb (%rsi), %xmm0
@@ -2192,7 +2192,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
2192
2192
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
2193
2193
; SSE42-NEXT: palignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2194
2194
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2195
- ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0 ,0,0 ]
2195
+ ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1 ,0,1 ]
2196
2196
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
2197
2197
; SSE42-NEXT: paddb %xmm1, %xmm2
2198
2198
; SSE42-NEXT: paddb (%rsi), %xmm0
@@ -2208,9 +2208,9 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
2208
2208
; AVX-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2209
2209
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2210
2210
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2211
- ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0 ,0,0 ]
2211
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1 ,0,1 ]
2212
2212
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2213
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
2213
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
2214
2214
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
2215
2215
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
2216
2216
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -2835,7 +2835,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
2835
2835
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
2836
2836
; SSE2-NEXT: pand %xmm0, %xmm1
2837
2837
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2838
- ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0 ,0,0 ]
2838
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1 ,0,1 ]
2839
2839
; SSE2-NEXT: pandn %xmm2, %xmm0
2840
2840
; SSE2-NEXT: por %xmm1, %xmm0
2841
2841
; SSE2-NEXT: paddb (%rsi), %xmm0
@@ -2850,7 +2850,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
2850
2850
; SSE42-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
2851
2851
; SSE42: # %bb.0:
2852
2852
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
2853
- ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
2853
+ ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
2854
2854
; SSE42-NEXT: movdqa 48(%rdi), %xmm1
2855
2855
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
2856
2856
; SSE42-NEXT: paddb (%rsi), %xmm1
@@ -2865,7 +2865,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
2865
2865
; AVX-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
2866
2866
; AVX: # %bb.0:
2867
2867
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
2868
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
2868
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
2869
2869
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
2870
2870
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
2871
2871
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
@@ -3913,7 +3913,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
3913
3913
; SSE-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
3914
3914
; SSE: # %bb.0:
3915
3915
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
3916
- ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
3916
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
3917
3917
; SSE-NEXT: movdqa 16(%rsi), %xmm1
3918
3918
; SSE-NEXT: paddb %xmm0, %xmm1
3919
3919
; SSE-NEXT: movdqa (%rsi), %xmm2
@@ -3930,7 +3930,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
3930
3930
; AVX-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
3931
3931
; AVX: # %bb.0:
3932
3932
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
3933
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0 ,0,0 ]
3933
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1 ,0,1 ]
3934
3934
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
3935
3935
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
3936
3936
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
0 commit comments