@@ -4895,11 +4895,10 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
4895
4895
;
4896
4896
; AVX512BW-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
4897
4897
; AVX512BW: # %bb.0:
4898
- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
4898
+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,0,2,0,8,0,6,0]
4899
4899
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
4900
- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,0,10,0,0,0,14,0]
4901
- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
4902
- ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
4900
+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
4901
+ ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
4903
4902
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4904
4903
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
4905
4904
; AVX512BW-NEXT: vzeroupper
@@ -4997,11 +4996,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
4997
4996
;
4998
4997
; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
4999
4998
; AVX512BW: # %bb.0:
5000
- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
4999
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31]
5001
5000
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5002
- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
5003
- ; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
5004
- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5001
+ ; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1
5002
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
5005
5003
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
5006
5004
; AVX512BW-NEXT: vzeroupper
5007
5005
; AVX512BW-NEXT: retq
@@ -5411,39 +5409,36 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
5411
5409
;
5412
5410
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5413
5411
; AVX512F: # %bb.0:
5414
- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5412
+ ; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
5415
5413
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5416
- ; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5417
- ; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5418
- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5414
+ ; AVX512F-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5415
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5419
5416
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5420
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5417
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5421
5418
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
5422
5419
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
5423
5420
; AVX512F-NEXT: vzeroupper
5424
5421
; AVX512F-NEXT: retq
5425
5422
;
5426
5423
; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5427
5424
; AVX512DQ: # %bb.0:
5428
- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5425
+ ; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
5429
5426
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5430
- ; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5431
- ; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5432
- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5427
+ ; AVX512DQ-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5428
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5433
5429
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5434
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5430
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5435
5431
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
5436
5432
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
5437
5433
; AVX512DQ-NEXT: vzeroupper
5438
5434
; AVX512DQ-NEXT: retq
5439
5435
;
5440
5436
; AVX512BW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5441
5437
; AVX512BW: # %bb.0:
5442
- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5438
+ ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
5443
5439
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5444
- ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5445
- ; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5446
- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5440
+ ; AVX512BW-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5441
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
5447
5442
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
5448
5443
; AVX512BW-NEXT: vzeroupper
5449
5444
; AVX512BW-NEXT: retq
@@ -5679,39 +5674,36 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
5679
5674
;
5680
5675
; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
5681
5676
; AVX512F: # %bb.0:
5682
- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5677
+ ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
5683
5678
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5684
- ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5685
- ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5686
- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5679
+ ; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5680
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5687
5681
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5688
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5682
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5689
5683
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
5690
5684
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
5691
5685
; AVX512F-NEXT: vzeroupper
5692
5686
; AVX512F-NEXT: retq
5693
5687
;
5694
5688
; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
5695
5689
; AVX512DQ: # %bb.0:
5696
- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5690
+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
5697
5691
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5698
- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5699
- ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5700
- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5692
+ ; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5693
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5701
5694
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5702
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5695
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5703
5696
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
5704
5697
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
5705
5698
; AVX512DQ-NEXT: vzeroupper
5706
5699
; AVX512DQ-NEXT: retq
5707
5700
;
5708
5701
; AVX512BW-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
5709
5702
; AVX512BW: # %bb.0:
5710
- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5703
+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
5711
5704
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5712
- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5713
- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5714
- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5705
+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5706
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
5715
5707
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
5716
5708
; AVX512BW-NEXT: vzeroupper
5717
5709
; AVX512BW-NEXT: retq
@@ -5938,39 +5930,36 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
5938
5930
;
5939
5931
; AVX512F-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
5940
5932
; AVX512F: # %bb.0:
5941
- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5933
+ ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
5942
5934
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5943
- ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5944
- ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5945
- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5935
+ ; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5936
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5946
5937
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5947
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5938
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5948
5939
; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
5949
5940
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
5950
5941
; AVX512F-NEXT: vzeroupper
5951
5942
; AVX512F-NEXT: retq
5952
5943
;
5953
5944
; AVX512DQ-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
5954
5945
; AVX512DQ: # %bb.0:
5955
- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5946
+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
5956
5947
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5957
- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5958
- ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5959
- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5948
+ ; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5949
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
5960
5950
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5961
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5951
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
5962
5952
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
5963
5953
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
5964
5954
; AVX512DQ-NEXT: vzeroupper
5965
5955
; AVX512DQ-NEXT: retq
5966
5956
;
5967
5957
; AVX512BW-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
5968
5958
; AVX512BW: # %bb.0:
5969
- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5959
+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
5970
5960
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5971
- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5972
- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5973
- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5961
+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5962
+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
5974
5963
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
5975
5964
; AVX512BW-NEXT: vzeroupper
5976
5965
; AVX512BW-NEXT: retq
0 commit comments