@@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
1560
1560
;
1561
1561
; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
1562
1562
; AVX2: # %bb.0:
1563
- ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
1564
- ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1563
+ ; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
1565
1564
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
1566
1565
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1567
1566
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3723,53 +3722,49 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
3723
3722
;
3724
3723
; AVX2-SLOW-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
3725
3724
; AVX2-SLOW: # %bb.0:
3726
- ; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %xmm0
3727
- ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3728
- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3729
- ; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
3730
- ; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3731
- ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3732
- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3725
+ ; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm0
3726
+ ; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %ymm1
3727
+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
3733
3728
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3734
- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3735
- ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3736
- ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3737
- ; AVX2-SLOW-NEXT: vmovdqa %ymm0, 32(%rdx)
3738
- ; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%rdx)
3729
+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5],ymm0[6],ymm2[7]
3730
+ ; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3731
+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3732
+ ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3733
+ ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3734
+ ; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
3735
+ ; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
3739
3736
; AVX2-SLOW-NEXT: vzeroupper
3740
3737
; AVX2-SLOW-NEXT: retq
3741
3738
;
3742
3739
; AVX2-FAST-PERLANE-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
3743
3740
; AVX2-FAST-PERLANE: # %bb.0:
3744
3741
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm0
3745
3742
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
3746
- ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3747
- ; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3748
- ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3749
- ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3750
- ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
3751
- ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3752
- ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3753
- ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3754
- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, 32(%rdx)
3755
- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, (%rdx)
3743
+ ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3744
+ ; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm0
3745
+ ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3746
+ ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm1, %xmm1, %xmm1
3747
+ ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3748
+ ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3749
+ ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3750
+ ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
3751
+ ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
3756
3752
; AVX2-FAST-PERLANE-NEXT: vzeroupper
3757
3753
; AVX2-FAST-PERLANE-NEXT: retq
3758
3754
;
3759
3755
; AVX2-FAST-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
3760
3756
; AVX2-FAST: # %bb.0:
3761
3757
; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm0
3762
3758
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
3763
- ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3764
- ; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3765
- ; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3766
- ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3767
- ; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
3768
- ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3769
- ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3770
- ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3771
- ; AVX2-FAST-NEXT: vmovdqa %ymm0, 32(%rdx)
3772
- ; AVX2-FAST-NEXT: vmovdqa %ymm1, (%rdx)
3759
+ ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3760
+ ; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm0
3761
+ ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3762
+ ; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
3763
+ ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3764
+ ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3765
+ ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3766
+ ; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
3767
+ ; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
3773
3768
; AVX2-FAST-NEXT: vzeroupper
3774
3769
; AVX2-FAST-NEXT: retq
3775
3770
;
@@ -5317,40 +5312,17 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
5317
5312
; AVX-NEXT: vzeroupper
5318
5313
; AVX-NEXT: retq
5319
5314
;
5320
- ; AVX2-SLOW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5321
- ; AVX2-SLOW: # %bb.0:
5322
- ; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5323
- ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
5324
- ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5325
- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5326
- ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5327
- ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5328
- ; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
5329
- ; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
5330
- ; AVX2-SLOW-NEXT: vzeroupper
5331
- ; AVX2-SLOW-NEXT: retq
5332
- ;
5333
- ; AVX2-FAST-PERLANE-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5334
- ; AVX2-FAST-PERLANE: # %bb.0:
5335
- ; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5336
- ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5337
- ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5338
- ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5339
- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
5340
- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
5341
- ; AVX2-FAST-PERLANE-NEXT: vzeroupper
5342
- ; AVX2-FAST-PERLANE-NEXT: retq
5343
- ;
5344
- ; AVX2-FAST-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5345
- ; AVX2-FAST: # %bb.0:
5346
- ; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5347
- ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5348
- ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5349
- ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5350
- ; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
5351
- ; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
5352
- ; AVX2-FAST-NEXT: vzeroupper
5353
- ; AVX2-FAST-NEXT: retq
5315
+ ; AVX2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5316
+ ; AVX2: # %bb.0:
5317
+ ; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
5318
+ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
5319
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5320
+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5321
+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5322
+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
5323
+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
5324
+ ; AVX2-NEXT: vzeroupper
5325
+ ; AVX2-NEXT: retq
5354
5326
;
5355
5327
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5356
5328
; AVX512F: # %bb.0:
0 commit comments