@@ -5684,10 +5684,9 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
5684
5684
;
5685
5685
; AVX512BW-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
5686
5686
; AVX512BW: # %bb.0:
5687
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5688
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5689
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5690
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5687
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5688
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5689
+ ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
5691
5690
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero
5692
5691
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
5693
5692
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5797,10 +5796,9 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
5797
5796
;
5798
5797
; AVX512BW-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
5799
5798
; AVX512BW: # %bb.0:
5800
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5801
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5802
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5803
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5799
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5800
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5801
+ ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
5804
5802
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero
5805
5803
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
5806
5804
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5910,10 +5908,9 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.v
5910
5908
;
5911
5909
; AVX512BW-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
5912
5910
; AVX512BW: # %bb.0:
5913
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
5914
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5915
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
5916
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
5911
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
5912
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
5913
+ ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
5917
5914
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero
5918
5915
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
5919
5916
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6004,10 +6001,9 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
6004
6001
;
6005
6002
; AVX512BW-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
6006
6003
; AVX512BW: # %bb.0:
6007
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6008
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
6009
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6010
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6004
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6005
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
6006
+ ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
6011
6007
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
6012
6008
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
6013
6009
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6211,12 +6207,12 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
6211
6207
;
6212
6208
; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
6213
6209
; AVX512BW: # %bb.0:
6214
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6215
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
6216
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6217
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6218
- ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero
6219
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0 , %zmm0
6210
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6211
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0 , %zmm0
6212
+ ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6213
+ ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
6214
+ ; AVX512BW-NEXT: vpermi2w %zmm1, % zmm0, %zmm2
6215
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2 , %zmm0
6220
6216
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
6221
6217
; AVX512BW-NEXT: vzeroupper
6222
6218
; AVX512BW-NEXT: retq
@@ -6330,12 +6326,12 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
6330
6326
;
6331
6327
; AVX512BW-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
6332
6328
; AVX512BW: # %bb.0:
6333
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6334
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
6335
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6336
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6337
- ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero
6338
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0 , %zmm0
6329
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6330
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0 , %zmm0
6331
+ ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6332
+ ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,0,37,38,39,0,41,42,43,0,45,46,47,0,49,50,51,0,53,54,55,0,57,58,59,0,61,62,63]
6333
+ ; AVX512BW-NEXT: vpermi2w %zmm1, % zmm0, %zmm2
6334
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2 , %zmm0
6339
6335
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
6340
6336
; AVX512BW-NEXT: vzeroupper
6341
6337
; AVX512BW-NEXT: retq
@@ -6449,12 +6445,12 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
6449
6445
;
6450
6446
; AVX512BW-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
6451
6447
; AVX512BW: # %bb.0:
6452
- ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
6453
- ; AVX512BW-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
6454
- ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
6455
- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
6456
- ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip) , %zmm0, %zmm0
6457
- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0 , %zmm0
6448
+ ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
6449
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0 , %zmm0
6450
+ ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
6451
+ ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,36,37,38,39,0,41,42,43,44,45,46,47,0,49,50,51,52,53,54,55,0,57,58,59,60,61,62,63]
6452
+ ; AVX512BW-NEXT: vpermi2w %zmm1 , %zmm0, %zmm2
6453
+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2 , %zmm0
6458
6454
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
6459
6455
; AVX512BW-NEXT: vzeroupper
6460
6456
; AVX512BW-NEXT: retq
0 commit comments