@@ -755,3 +755,85 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755
755
%r = udiv <4 x i1 > %x , %y
756
756
ret <4 x i1 > %r
757
757
}
758
+
759
+ define <4 x i32 > @vector_div_leading_zeros (<4 x i32 > %x ) {
760
+ ; SSE2-LABEL: vector_div_leading_zeros:
761
+ ; SSE2: # %bb.0:
762
+ ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763
+ ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764
+ ; SSE2-NEXT: movdqa %xmm0, %xmm2
765
+ ; SSE2-NEXT: pmuludq %xmm1, %xmm2
766
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
767
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
768
+ ; SSE2-NEXT: pmuludq %xmm1, %xmm3
769
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
770
+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
771
+ ; SSE2-NEXT: psubd %xmm2, %xmm0
772
+ ; SSE2-NEXT: psrld $1, %xmm0
773
+ ; SSE2-NEXT: paddd %xmm2, %xmm0
774
+ ; SSE2-NEXT: psrld $2, %xmm0
775
+ ; SSE2-NEXT: retq
776
+ ;
777
+ ; SSE41-LABEL: vector_div_leading_zeros:
778
+ ; SSE41: # %bb.0:
779
+ ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
780
+ ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
781
+ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
782
+ ; SSE41-NEXT: pmuludq %xmm2, %xmm1
783
+ ; SSE41-NEXT: pmuludq %xmm0, %xmm2
784
+ ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
785
+ ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
786
+ ; SSE41-NEXT: psubd %xmm2, %xmm0
787
+ ; SSE41-NEXT: psrld $1, %xmm0
788
+ ; SSE41-NEXT: paddd %xmm2, %xmm0
789
+ ; SSE41-NEXT: psrld $2, %xmm0
790
+ ; SSE41-NEXT: retq
791
+ ;
792
+ ; AVX1-LABEL: vector_div_leading_zeros:
793
+ ; AVX1: # %bb.0:
794
+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
795
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
796
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
797
+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
798
+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
799
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
800
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
801
+ ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
802
+ ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
803
+ ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
804
+ ; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
805
+ ; AVX1-NEXT: retq
806
+ ;
807
+ ; AVX2-LABEL: vector_div_leading_zeros:
808
+ ; AVX2: # %bb.0:
809
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
810
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
811
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
812
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
813
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
814
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
815
+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
816
+ ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
817
+ ; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
818
+ ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
819
+ ; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
820
+ ; AVX2-NEXT: retq
821
+ ;
822
+ ; XOP-LABEL: vector_div_leading_zeros:
823
+ ; XOP: # %bb.0:
824
+ ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
825
+ ; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
826
+ ; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
827
+ ; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
828
+ ; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
829
+ ; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
830
+ ; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
831
+ ; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
832
+ ; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
833
+ ; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
834
+ ; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
835
+ ; XOP-NEXT: retq
836
+ %a = and <4 x i32 > %x , <i32 255 , i32 255 , i32 255 , i32 255 >
837
+ %b = udiv <4 x i32 > %a , <i32 7 , i32 7 , i32 7 , i32 7 >
838
+ ret <4 x i32 > %b
839
+ }
0 commit comments