@@ -3567,14 +3567,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
3567
3567
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
3568
3568
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
3569
3569
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3570
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3571
3570
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
3572
3571
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3573
3572
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3574
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
3573
+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero, xmm0[0,1],zero,zero, xmm0[0,1],zero,zero
3575
3574
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
3576
- ; AVX-NEXT: vmovdqa %xmm1, (%rcx)
3577
3575
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3576
+ ; AVX-NEXT: vmovdqa %xmm1, (%rcx)
3578
3577
; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
3579
3578
; AVX-NEXT: retq
3580
3579
;
@@ -3757,14 +3756,14 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
3757
3756
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3758
3757
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
3759
3758
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3760
- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
3759
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
3761
3760
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3762
- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2 , %xmm2
3763
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
3761
+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm3 , %xmm3
3762
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3], xmm0[4],xmm2[5,6], xmm0[7]
3764
3763
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
3765
3764
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
3766
3765
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3767
- ; AVX-NEXT: vmovdqa %xmm2 , 32(%rcx)
3766
+ ; AVX-NEXT: vmovdqa %xmm3 , 32(%rcx)
3768
3767
; AVX-NEXT: retq
3769
3768
;
3770
3769
; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
@@ -3955,10 +3954,9 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3955
3954
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3956
3955
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
3957
3956
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3958
- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3957
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3959
3958
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3960
- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3961
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3959
+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
3962
3960
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
3963
3961
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
3964
3962
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4181,17 +4179,16 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
4181
4179
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4182
4180
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
4183
4181
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5],xmm2[6],xmm1[7]
4184
- ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
4185
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
4182
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
4186
4183
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
4187
- ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0 [2],xmm3[3,4,5,6,7]
4184
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2 [2],xmm3[3,4,5,6,7]
4188
4185
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4189
- ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
4190
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
4191
- ; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2
4186
+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4187
+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
4188
+ ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
4189
+ ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
4192
4190
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
4193
- ; AVX-NEXT: vmovdqa %xmm2, 16(%rcx)
4194
- ; AVX-NEXT: vmovdqa %xmm0, 32(%rcx)
4191
+ ; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
4195
4192
; AVX-NEXT: retq
4196
4193
;
4197
4194
; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
@@ -4379,10 +4376,9 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
4379
4376
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4380
4377
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4381
4378
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
4382
- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
4379
+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
4383
4380
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4384
- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4385
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
4381
+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
4386
4382
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
4387
4383
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
4388
4384
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4517,10 +4513,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4517
4513
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4518
4514
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4519
4515
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4520
- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
4521
4516
; AVX-NEXT: vmovaps 32(%rdx), %ymm2
4522
4517
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4523
- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
4518
+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero, xmm0[0,1],zero,zero,zero,zero,zero,zero
4524
4519
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
4525
4520
; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
4526
4521
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
0 commit comments