@@ -139,7 +139,7 @@ define <2 x i32> @smulo_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) nounwind {
139
139
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
140
140
; AVX512-NEXT: vpsrad $31, %xmm2, %xmm0
141
141
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
142
- ; AVX512-NEXT: vpternlogq $15, % xmm0, %xmm0, % xmm0
142
+ ; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~ xmm0
143
143
; AVX512-NEXT: vmovq %xmm2, (%rdi)
144
144
; AVX512-NEXT: retq
145
145
%t = call {<2 x i32 >, <2 x i1 >} @llvm.smul.with.overflow.v2i32 (<2 x i32 > %a0 , <2 x i32 > %a1 )
@@ -1234,7 +1234,7 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, ptr %p2) nounwin
1234
1234
; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm1
1235
1235
; AVX512-NEXT: vpsrad $31, %zmm1, %zmm0
1236
1236
; AVX512-NEXT: vpcmpneqd %zmm0, %zmm4, %k1
1237
- ; AVX512-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1237
+ ; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1238
1238
; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi)
1239
1239
; AVX512-NEXT: retq
1240
1240
%t = call {<16 x i32 >, <16 x i1 >} @llvm.smul.with.overflow.v16i32 (<16 x i32 > %a0 , <16 x i32 > %a1 )
@@ -1443,7 +1443,7 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, ptr %p2) nounwind {
1443
1443
; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
1444
1444
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
1445
1445
; AVX512F-NEXT: vpcmpneqd %zmm0, %zmm2, %k1
1446
- ; AVX512F-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1446
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1447
1447
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1448
1448
; AVX512F-NEXT: vpmovdb %zmm1, (%rdi)
1449
1449
; AVX512F-NEXT: retq
@@ -1457,7 +1457,7 @@ define <16 x i32> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, ptr %p2) nounwind {
1457
1457
; AVX512BW-NEXT: vpsllw $8, %ymm1, %ymm2
1458
1458
; AVX512BW-NEXT: vpsraw $15, %ymm2, %ymm2
1459
1459
; AVX512BW-NEXT: vpcmpneqw %ymm0, %ymm2, %k1
1460
- ; AVX512BW-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1460
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1461
1461
; AVX512BW-NEXT: vpmovwb %ymm1, (%rdi)
1462
1462
; AVX512BW-NEXT: retq
1463
1463
%t = call {<16 x i8 >, <16 x i1 >} @llvm.smul.with.overflow.v16i8 (<16 x i8 > %a0 , <16 x i8 > %a1 )
@@ -1853,8 +1853,8 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, ptr %p2) nounwind {
1853
1853
; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1
1854
1854
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
1855
1855
; AVX512F-NEXT: vpcmpneqd %zmm0, %zmm1, %k2
1856
- ; AVX512F-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k2} {z}
1857
- ; AVX512F-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k1} {z}
1856
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
1857
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
1858
1858
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
1859
1859
; AVX512F-NEXT: vpmovdb %zmm2, 16(%rdi)
1860
1860
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
@@ -1870,9 +1870,9 @@ define <32 x i32> @smulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, ptr %p2) nounwind {
1870
1870
; AVX512BW-NEXT: vpsllw $8, %zmm2, %zmm1
1871
1871
; AVX512BW-NEXT: vpsraw $15, %zmm1, %zmm1
1872
1872
; AVX512BW-NEXT: vpcmpneqw %zmm0, %zmm1, %k1
1873
- ; AVX512BW-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1873
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1874
1874
; AVX512BW-NEXT: kshiftrd $16, %k1, %k1
1875
- ; AVX512BW-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k1} {z}
1875
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
1876
1876
; AVX512BW-NEXT: vpmovwb %zmm2, (%rdi)
1877
1877
; AVX512BW-NEXT: retq
1878
1878
%t = call {<32 x i8 >, <32 x i1 >} @llvm.smul.with.overflow.v32i8 (<32 x i8 > %a0 , <32 x i8 > %a1 )
@@ -2637,10 +2637,10 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
2637
2637
; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1
2638
2638
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
2639
2639
; AVX512F-NEXT: vpcmpneqd %zmm0, %zmm1, %k4
2640
- ; AVX512F-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k4} {z}
2641
- ; AVX512F-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k3} {z}
2642
- ; AVX512F-NEXT: vpternlogd $255, % zmm2, %zmm2, %zmm2 {%k2} {z}
2643
- ; AVX512F-NEXT: vpternlogd $255, % zmm3, %zmm3, %zmm3 {%k1} {z}
2640
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k4} {z} = -1
2641
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k3} {z} = -1
2642
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1
2643
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k1} {z} = -1
2644
2644
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
2645
2645
; AVX512F-NEXT: vpmovdb %zmm4, 48(%rdi)
2646
2646
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
@@ -2670,13 +2670,13 @@ define <64 x i32> @smulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, ptr %p2) nounwind {
2670
2670
; AVX512BW-NEXT: vpmovb2m %zmm4, %k0
2671
2671
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
2672
2672
; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k1
2673
- ; AVX512BW-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
2673
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
2674
2674
; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
2675
- ; AVX512BW-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k2} {z}
2675
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 {%k2} {z} = -1
2676
2676
; AVX512BW-NEXT: kshiftrq $32, %k1, %k1
2677
- ; AVX512BW-NEXT: vpternlogd $255, % zmm2, %zmm2, %zmm2 {%k1} {z}
2677
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1
2678
2678
; AVX512BW-NEXT: kshiftrd $16, %k1, %k1
2679
- ; AVX512BW-NEXT: vpternlogd $255, % zmm3, %zmm3, %zmm3 {%k1} {z}
2679
+ ; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 {%k1} {z} = -1
2680
2680
; AVX512BW-NEXT: vmovdqa64 %zmm4, (%rdi)
2681
2681
; AVX512BW-NEXT: retq
2682
2682
%t = call {<64 x i8 >, <64 x i1 >} @llvm.smul.with.overflow.v64i8 (<64 x i8 > %a0 , <64 x i8 > %a1 )
@@ -2770,7 +2770,7 @@ define <8 x i32> @smulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, ptr %p2) nounwind {
2770
2770
; AVX512F-NEXT: vpmullw %xmm1, %xmm0, %xmm1
2771
2771
; AVX512F-NEXT: vpsraw $15, %xmm1, %xmm0
2772
2772
; AVX512F-NEXT: vpcmpeqw %xmm0, %xmm2, %xmm0
2773
- ; AVX512F-NEXT: vpternlogq $15, % xmm0, %xmm0, % xmm0
2773
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} xmm0 = ~ xmm0
2774
2774
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
2775
2775
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k1
2776
2776
; AVX512F-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
0 commit comments