@@ -755,3 +755,64 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755
755
%r = udiv <4 x i1 > %x , %y
756
756
ret <4 x i1 > %r
757
757
}
758
+
759
+ define <4 x i32 > @vector_div_leading_zeros (<4 x i32 > %x ) {
760
+ ; SSE2-LABEL: vector_div_leading_zeros:
761
+ ; SSE2: # %bb.0:
762
+ ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763
+ ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
765
+ ; SSE2-NEXT: pmuludq %xmm1, %xmm0
766
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
767
+ ; SSE2-NEXT: pmuludq %xmm1, %xmm2
768
+ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
769
+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
770
+ ; SSE2-NEXT: retq
771
+ ;
772
+ ; SSE41-LABEL: vector_div_leading_zeros:
773
+ ; SSE41: # %bb.0:
774
+ ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
775
+ ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
776
+ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
777
+ ; SSE41-NEXT: pmuludq %xmm2, %xmm1
778
+ ; SSE41-NEXT: pmuludq %xmm2, %xmm0
779
+ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
780
+ ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
781
+ ; SSE41-NEXT: retq
782
+ ;
783
+ ; AVX1-LABEL: vector_div_leading_zeros:
784
+ ; AVX1: # %bb.0:
785
+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
787
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
788
+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
789
+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
790
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
791
+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
792
+ ; AVX1-NEXT: retq
793
+ ;
794
+ ; AVX2-LABEL: vector_div_leading_zeros:
795
+ ; AVX2: # %bb.0:
796
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
797
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
799
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
800
+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
801
+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
802
+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
803
+ ; AVX2-NEXT: retq
804
+ ;
805
+ ; XOP-LABEL: vector_div_leading_zeros:
806
+ ; XOP: # %bb.0:
807
+ ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
808
+ ; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
809
+ ; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
810
+ ; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
811
+ ; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
812
+ ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
813
+ ; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
814
+ ; XOP-NEXT: retq
815
+ %a = and <4 x i32 > %x , <i32 255 , i32 255 , i32 255 , i32 255 >
816
+ %b = udiv <4 x i32 > %a , <i32 7 , i32 7 , i32 7 , i32 7 >
817
+ ret <4 x i32 > %b
818
+ }
0 commit comments