Skip to content

Commit 247016a

Browse files
committed
[X86] Use vptestm/vptestnm for comparisons with zero to avoid creating a zero vector.
We can use the same input for both operands to get a free compare with zero. We already use this trick in a couple places where we explicitly create PTESTM with the same input twice. This generalizes it. I'm hoping to remove the ISD opcodes and move this to isel patterns like we do for scalar cmp/test. llvm-svn: 323605
1 parent 513d3fa commit 247016a

30 files changed

+2560
-4993
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17777,6 +17777,13 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
1777717777
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
1777817778
dl, VT, RHS, LHS);
1777917779
}
17780+
17781+
// If this is just a comparison with 0 without an AND, we can just use
17782+
// the same input twice to avoid creating a zero vector.
17783+
if (ISD::isBuildVectorAllZeros(Op1.getNode())) {
17784+
return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
17785+
dl, VT, Op0, Op0);
17786+
}
1778017787
}
1778117788

1778217789
unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU

llvm/test/CodeGen/X86/avx512-arith.ll

Lines changed: 28 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,7 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
401401
define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
402402
; CHECK-LABEL: vpaddd_mask_test:
403403
; CHECK: # %bb.0:
404-
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
405-
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
404+
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
406405
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
407406
; CHECK-NEXT: retq
408407
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -414,8 +413,7 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %ma
414413
define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
415414
; CHECK-LABEL: vpaddd_maskz_test:
416415
; CHECK: # %bb.0:
417-
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
418-
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
416+
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
419417
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
420418
; CHECK-NEXT: retq
421419
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -427,8 +425,7 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %m
427425
define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
428426
; CHECK-LABEL: vpaddd_mask_fold_test:
429427
; CHECK: # %bb.0:
430-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
431-
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
428+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
432429
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
433430
; CHECK-NEXT: retq
434431
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -441,8 +438,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
441438
define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
442439
; CHECK-LABEL: vpaddd_mask_broadcast_test:
443440
; CHECK: # %bb.0:
444-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
445-
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
441+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
446442
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
447443
; CHECK-NEXT: retq
448444
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -454,8 +450,7 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
454450
define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
455451
; CHECK-LABEL: vpaddd_maskz_fold_test:
456452
; CHECK: # %bb.0:
457-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
458-
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
453+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
459454
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
460455
; CHECK-NEXT: retq
461456
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -468,8 +463,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
468463
define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
469464
; CHECK-LABEL: vpaddd_maskz_broadcast_test:
470465
; CHECK: # %bb.0:
471-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
472-
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
466+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
473467
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
474468
; CHECK-NEXT: retq
475469
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -671,8 +665,7 @@ entry:
671665
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
672666
; CHECK-LABEL: test_mask_vaddps:
673667
; CHECK: # %bb.0:
674-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
675-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
668+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
676669
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
677670
; CHECK-NEXT: retq
678671
<16 x float> %j, <16 x i32> %mask1)
@@ -686,8 +679,7 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
686679
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
687680
; CHECK-LABEL: test_mask_vmulps:
688681
; CHECK: # %bb.0:
689-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
690-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
682+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
691683
; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
692684
; CHECK-NEXT: retq
693685
<16 x float> %j, <16 x i32> %mask1)
@@ -701,8 +693,7 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
701693
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
702694
; CHECK-LABEL: test_mask_vminps:
703695
; CHECK: # %bb.0:
704-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
705-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
696+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
706697
; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
707698
; CHECK-NEXT: retq
708699
<16 x float> %j, <16 x i32> %mask1)
@@ -718,38 +709,33 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
718709
; AVX512F-LABEL: test_mask_vminpd:
719710
; AVX512F: # %bb.0:
720711
; AVX512F-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
721-
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
722-
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
712+
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
723713
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
724714
; AVX512F-NEXT: retq
725715
;
726716
; AVX512VL-LABEL: test_mask_vminpd:
727717
; AVX512VL: # %bb.0:
728-
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
729-
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
718+
; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
730719
; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
731720
; AVX512VL-NEXT: retq
732721
;
733722
; AVX512BW-LABEL: test_mask_vminpd:
734723
; AVX512BW: # %bb.0:
735724
; AVX512BW-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
736-
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
737-
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
725+
; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
738726
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
739727
; AVX512BW-NEXT: retq
740728
;
741729
; AVX512DQ-LABEL: test_mask_vminpd:
742730
; AVX512DQ: # %bb.0:
743731
; AVX512DQ-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
744-
; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
745-
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
732+
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
746733
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
747734
; AVX512DQ-NEXT: retq
748735
;
749736
; SKX-LABEL: test_mask_vminpd:
750737
; SKX: # %bb.0:
751-
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
752-
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
738+
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
753739
; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
754740
; SKX-NEXT: retq
755741
<8 x double> %j, <8 x i32> %mask1)
@@ -764,8 +750,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
764750
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
765751
; CHECK-LABEL: test_mask_vmaxps:
766752
; CHECK: # %bb.0:
767-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
768-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
753+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
769754
; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
770755
; CHECK-NEXT: retq
771756
<16 x float> %j, <16 x i32> %mask1)
@@ -781,38 +766,33 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
781766
; AVX512F-LABEL: test_mask_vmaxpd:
782767
; AVX512F: # %bb.0:
783768
; AVX512F-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
784-
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
785-
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
769+
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
786770
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
787771
; AVX512F-NEXT: retq
788772
;
789773
; AVX512VL-LABEL: test_mask_vmaxpd:
790774
; AVX512VL: # %bb.0:
791-
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
792-
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
775+
; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
793776
; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
794777
; AVX512VL-NEXT: retq
795778
;
796779
; AVX512BW-LABEL: test_mask_vmaxpd:
797780
; AVX512BW: # %bb.0:
798781
; AVX512BW-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
799-
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
800-
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
782+
; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
801783
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
802784
; AVX512BW-NEXT: retq
803785
;
804786
; AVX512DQ-LABEL: test_mask_vmaxpd:
805787
; AVX512DQ: # %bb.0:
806788
; AVX512DQ-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3
807-
; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
808-
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
789+
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
809790
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
810791
; AVX512DQ-NEXT: retq
811792
;
812793
; SKX-LABEL: test_mask_vmaxpd:
813794
; SKX: # %bb.0:
814-
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
815-
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
795+
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
816796
; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
817797
; SKX-NEXT: retq
818798
<8 x double> %j, <8 x i32> %mask1)
@@ -827,8 +807,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
827807
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
828808
; CHECK-LABEL: test_mask_vsubps:
829809
; CHECK: # %bb.0:
830-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
831-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
810+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
832811
; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
833812
; CHECK-NEXT: retq
834813
<16 x float> %j, <16 x i32> %mask1)
@@ -842,8 +821,7 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
842821
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
843822
; CHECK-LABEL: test_mask_vdivps:
844823
; CHECK: # %bb.0:
845-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
846-
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
824+
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
847825
; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
848826
; CHECK-NEXT: retq
849827
<16 x float> %j, <16 x i32> %mask1)
@@ -857,8 +835,7 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
857835
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
858836
; CHECK-LABEL: test_mask_vaddpd:
859837
; CHECK: # %bb.0:
860-
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
861-
; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
838+
; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1
862839
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
863840
; CHECK-NEXT: retq
864841
<8 x double> %j, <8 x i64> %mask1)
@@ -872,8 +849,7 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
872849
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
873850
; CHECK-LABEL: test_maskz_vaddpd:
874851
; CHECK: # %bb.0:
875-
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
876-
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
852+
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
877853
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
878854
; CHECK-NEXT: retq
879855
<8 x i64> %mask1) nounwind readnone {
@@ -886,8 +862,7 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
886862
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
887863
; CHECK-LABEL: test_mask_fold_vaddpd:
888864
; CHECK: # %bb.0:
889-
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
890-
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
865+
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
891866
; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
892867
; CHECK-NEXT: retq
893868
<8 x double>* %j, <8 x i64> %mask1)
@@ -902,8 +877,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
902877
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
903878
; CHECK-LABEL: test_maskz_fold_vaddpd:
904879
; CHECK: # %bb.0:
905-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
906-
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
880+
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
907881
; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
908882
; CHECK-NEXT: retq
909883
<8 x i64> %mask1) nounwind {
@@ -930,8 +904,7 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind
930904
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
931905
; CHECK-LABEL: test_mask_broadcast_vaddpd:
932906
; CHECK: # %bb.0:
933-
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
934-
; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
907+
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
935908
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
936909
; CHECK-NEXT: vmovapd %zmm1, %zmm0
937910
; CHECK-NEXT: retq
@@ -949,8 +922,7 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double>
949922
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
950923
; CHECK-LABEL: test_maskz_broadcast_vaddpd:
951924
; CHECK: # %bb.0:
952-
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
953-
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
925+
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
954926
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
955927
; CHECK-NEXT: retq
956928
<8 x i64> %mask1) nounwind {

llvm/test/CodeGen/X86/avx512-mask-op.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2413,8 +2413,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
24132413
define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
24142414
; KNL-LABEL: test_bitcast_v8i1_zext:
24152415
; KNL: ## %bb.0:
2416-
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2417-
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2416+
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
24182417
; KNL-NEXT: kmovw %k0, %eax
24192418
; KNL-NEXT: movzbl %al, %eax
24202419
; KNL-NEXT: addl %eax, %eax
@@ -2423,17 +2422,15 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
24232422
;
24242423
; SKX-LABEL: test_bitcast_v8i1_zext:
24252424
; SKX: ## %bb.0:
2426-
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2427-
; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2425+
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
24282426
; SKX-NEXT: kmovb %k0, %eax
24292427
; SKX-NEXT: addl %eax, %eax
24302428
; SKX-NEXT: vzeroupper
24312429
; SKX-NEXT: retq
24322430
;
24332431
; AVX512BW-LABEL: test_bitcast_v8i1_zext:
24342432
; AVX512BW: ## %bb.0:
2435-
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
2436-
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2433+
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
24372434
; AVX512BW-NEXT: kmovd %k0, %eax
24382435
; AVX512BW-NEXT: movzbl %al, %eax
24392436
; AVX512BW-NEXT: addl %eax, %eax
@@ -2442,8 +2439,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
24422439
;
24432440
; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
24442441
; AVX512DQ: ## %bb.0:
2445-
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
2446-
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2442+
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
24472443
; AVX512DQ-NEXT: kmovb %k0, %eax
24482444
; AVX512DQ-NEXT: addl %eax, %eax
24492445
; AVX512DQ-NEXT: vzeroupper
@@ -2459,8 +2455,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
24592455
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
24602456
; CHECK-LABEL: test_bitcast_v16i1_zext:
24612457
; CHECK: ## %bb.0:
2462-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
2463-
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2458+
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
24642459
; CHECK-NEXT: kmovw %k0, %eax
24652460
; CHECK-NEXT: addl %eax, %eax
24662461
; CHECK-NEXT: vzeroupper

llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
66
; AVX512-LABEL: test1:
77
; AVX512: ## %bb.0:
8-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
9-
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
8+
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
109
; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
1110
; AVX512-NEXT: retq
1211
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
@@ -17,8 +16,7 @@ define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
1716
define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
1817
; AVX512-LABEL: test2:
1918
; AVX512: ## %bb.0:
20-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
21-
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
19+
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
2220
; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
2321
; AVX512-NEXT: retq
2422
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
@@ -29,8 +27,7 @@ define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
2927
define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
3028
; AVX512-LABEL: test3:
3129
; AVX512: ## %bb.0:
32-
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
33-
; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
30+
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
3431
; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1}
3532
; AVX512-NEXT: vzeroupper
3633
; AVX512-NEXT: retq
@@ -42,8 +39,7 @@ define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
4239
define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) {
4340
; AVX512-LABEL: test4:
4441
; AVX512: ## %bb.0:
45-
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
46-
; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
42+
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
4743
; AVX512-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1}
4844
; AVX512-NEXT: retq
4945
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
@@ -54,8 +50,7 @@ define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float
5450
define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) {
5551
; AVX512-LABEL: test13:
5652
; AVX512: ## %bb.0:
57-
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
58-
; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
53+
; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
5954
; AVX512-NEXT: vmovups %zmm1, (%rdi) {%k1}
6055
; AVX512-NEXT: vzeroupper
6156
; AVX512-NEXT: retq
@@ -99,9 +94,8 @@ declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16
9994
define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
10095
; AVX512-LABEL: test23:
10196
; AVX512: ## %bb.0:
102-
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
103-
; AVX512-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
104-
; AVX512-NEXT: vpcmpeqq %zmm2, %zmm1, %k2
97+
; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k1
98+
; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k2
10599
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
106100
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
107101
; AVX512-NEXT: retq

0 commit comments

Comments
 (0)