@@ -1478,10 +1478,8 @@ define void @vec256_i8_widen_to_i128_factor16_broadcast_to_v2i128_factor2(ptr %i
1478
1478
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
1479
1479
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1480
1480
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1481
- ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1482
- ; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
1483
- ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm2 = ymm0 ^ (ymm2 & (ymm1 ^ ymm0))
1484
- ; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm0
1481
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
1482
+ ; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
1485
1483
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
1486
1484
; AVX512F-NEXT: vzeroupper
1487
1485
; AVX512F-NEXT: retq
@@ -1493,10 +1491,8 @@ define void @vec256_i8_widen_to_i128_factor16_broadcast_to_v2i128_factor2(ptr %i
1493
1491
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
1494
1492
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
1495
1493
; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1496
- ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1497
- ; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1]
1498
- ; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm2 = ymm0 ^ (ymm2 & (ymm1 ^ ymm0))
1499
- ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm0
1494
+ ; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
1495
+ ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
1500
1496
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
1501
1497
; AVX512DQ-NEXT: vzeroupper
1502
1498
; AVX512DQ-NEXT: retq
@@ -3235,10 +3231,8 @@ define void @vec384_i8_widen_to_i128_factor16_broadcast_to_v3i128_factor3(ptr %i
3235
3231
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3236
3232
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3237
3233
; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,1,0,1]
3238
- ; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3239
- ; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
3240
- ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm3 = ymm2 ^ (ymm3 & (ymm1 ^ ymm2))
3241
- ; AVX512F-NEXT: vpaddb (%rdx), %ymm3, %ymm1
3234
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm2 = ymm2 ^ (mem & (ymm2 ^ ymm1))
3235
+ ; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm1
3242
3236
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3243
3237
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
3244
3238
; AVX512F-NEXT: vmovdqa %ymm1, (%rcx)
@@ -3252,10 +3246,8 @@ define void @vec384_i8_widen_to_i128_factor16_broadcast_to_v3i128_factor3(ptr %i
3252
3246
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3253
3247
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3254
3248
; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[0,1,0,1]
3255
- ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3256
- ; AVX512DQ-NEXT: # ymm3 = mem[0,1,0,1]
3257
- ; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm3 = ymm2 ^ (ymm3 & (ymm1 ^ ymm2))
3258
- ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm3, %ymm1
3249
+ ; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm2 = ymm2 ^ (mem & (ymm2 ^ ymm1))
3250
+ ; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm1
3259
3251
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3260
3252
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
3261
3253
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rcx)
@@ -3762,8 +3754,7 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3762
3754
; AVX512BW: # %bb.0:
3763
3755
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
3764
3756
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3765
- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,0,29,30,31,0,25,26,27,0,29,30,31]
3766
- ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
3757
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,26,27,0,29,30,31]
3767
3758
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
3768
3759
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3769
3760
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4015,8 +4006,7 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
4015
4006
; AVX512BW: # %bb.0:
4016
4007
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
4017
4008
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4018
- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,25,26,27,28,29,30,31]
4019
- ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
4009
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,26,27,28,29,30,31]
4020
4010
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
4021
4011
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
4022
4012
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
0 commit comments