Skip to content

Commit ae13721

Browse files
committed
Pre-commit test (NFC)
1 parent 5c9fc3c commit ae13721

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,3 +755,85 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755755
%r = udiv <4 x i1> %x, %y
756756
ret <4 x i1> %r
757757
}
758+
759+
define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
760+
; SSE2-LABEL: vector_div_leading_zeros:
761+
; SSE2: # %bb.0:
762+
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764+
; SSE2-NEXT: movdqa %xmm0, %xmm2
765+
; SSE2-NEXT: pmuludq %xmm1, %xmm2
766+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
767+
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
768+
; SSE2-NEXT: pmuludq %xmm1, %xmm3
769+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
770+
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
771+
; SSE2-NEXT: psubd %xmm2, %xmm0
772+
; SSE2-NEXT: psrld $1, %xmm0
773+
; SSE2-NEXT: paddd %xmm2, %xmm0
774+
; SSE2-NEXT: psrld $2, %xmm0
775+
; SSE2-NEXT: retq
776+
;
777+
; SSE41-LABEL: vector_div_leading_zeros:
778+
; SSE41: # %bb.0:
779+
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
780+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
781+
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
782+
; SSE41-NEXT: pmuludq %xmm2, %xmm1
783+
; SSE41-NEXT: pmuludq %xmm0, %xmm2
784+
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
785+
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
786+
; SSE41-NEXT: psubd %xmm2, %xmm0
787+
; SSE41-NEXT: psrld $1, %xmm0
788+
; SSE41-NEXT: paddd %xmm2, %xmm0
789+
; SSE41-NEXT: psrld $2, %xmm0
790+
; SSE41-NEXT: retq
791+
;
792+
; AVX1-LABEL: vector_div_leading_zeros:
793+
; AVX1: # %bb.0:
794+
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
795+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
796+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
797+
; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
798+
; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
799+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
800+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
801+
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
802+
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
803+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
804+
; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
805+
; AVX1-NEXT: retq
806+
;
807+
; AVX2-LABEL: vector_div_leading_zeros:
808+
; AVX2: # %bb.0:
809+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
810+
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
811+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
812+
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
813+
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
814+
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
815+
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
816+
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
817+
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
818+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
819+
; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
820+
; AVX2-NEXT: retq
821+
;
822+
; XOP-LABEL: vector_div_leading_zeros:
823+
; XOP: # %bb.0:
824+
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
825+
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
826+
; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
827+
; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
828+
; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
829+
; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
830+
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
831+
; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
832+
; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
833+
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
834+
; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
835+
; XOP-NEXT: retq
836+
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
837+
%b = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
838+
ret <4 x i32> %b
839+
}

0 commit comments

Comments
 (0)