Skip to content

Commit 4262e05

Browse files
committed
[CodeGen] Remove checks for vectors in unsigned division prior to computing leading zeros
It turns out we can safely use DAG.computeKnownBits(N0).countMinLeadingZeros() with constant legal vectors, so remove the check for it.
1 parent ae13721 commit 4262e05

File tree

3 files changed

+23
-54
lines changed

3 files changed

+23
-54
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6483,15 +6483,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
64836483

64846484
// Try to use leading zeros of the dividend to reduce the multiplier and
64856485
// avoid expensive fixups.
6486-
// TODO: Support vectors.
6487-
unsigned LeadingZeros = 0;
6488-
if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6489-
assert(!isOneConstant(N1) && "Unexpected divisor");
6490-
LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6491-
// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6492-
// the dividend exceeds the leading zeros for the divisor.
6493-
LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6494-
}
6486+
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
64956487

64966488
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
64976489
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
@@ -6510,7 +6502,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
65106502
MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
65116503
} else {
65126504
UnsignedDivisionByConstantInfo magics =
6513-
UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6505+
UnsignedDivisionByConstantInfo::get(
6506+
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
65146507

65156508
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
65166509

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 17 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -761,17 +761,12 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
761761
; SSE2: # %bb.0:
762762
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763763
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764-
; SSE2-NEXT: movdqa %xmm0, %xmm2
764+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
765+
; SSE2-NEXT: pmuludq %xmm1, %xmm0
766+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
765767
; SSE2-NEXT: pmuludq %xmm1, %xmm2
766-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
767-
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
768-
; SSE2-NEXT: pmuludq %xmm1, %xmm3
769-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
770-
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
771-
; SSE2-NEXT: psubd %xmm2, %xmm0
772-
; SSE2-NEXT: psrld $1, %xmm0
773-
; SSE2-NEXT: paddd %xmm2, %xmm0
774-
; SSE2-NEXT: psrld $2, %xmm0
768+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
769+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
775770
; SSE2-NEXT: retq
776771
;
777772
; SSE41-LABEL: vector_div_leading_zeros:
@@ -780,13 +775,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
780775
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
781776
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
782777
; SSE41-NEXT: pmuludq %xmm2, %xmm1
783-
; SSE41-NEXT: pmuludq %xmm0, %xmm2
784-
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
785-
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
786-
; SSE41-NEXT: psubd %xmm2, %xmm0
787-
; SSE41-NEXT: psrld $1, %xmm0
788-
; SSE41-NEXT: paddd %xmm2, %xmm0
789-
; SSE41-NEXT: psrld $2, %xmm0
778+
; SSE41-NEXT: pmuludq %xmm2, %xmm0
779+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
780+
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
790781
; SSE41-NEXT: retq
791782
;
792783
; AVX1-LABEL: vector_div_leading_zeros:
@@ -795,13 +786,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
795786
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
796787
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
797788
; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
798-
; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
799-
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
800-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
801-
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
802-
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
803-
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
804-
; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
789+
; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
790+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
791+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
805792
; AVX1-NEXT: retq
806793
;
807794
; AVX2-LABEL: vector_div_leading_zeros:
@@ -810,13 +797,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
810797
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
811798
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
812799
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
813-
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
814-
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
815-
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
816-
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
817-
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
818-
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
819-
; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
800+
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
801+
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
802+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
820803
; AVX2-NEXT: retq
821804
;
822805
; XOP-LABEL: vector_div_leading_zeros:
@@ -825,13 +808,9 @@ define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
825808
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
826809
; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
827810
; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
828-
; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
829-
; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
830-
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
831-
; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
832-
; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
833-
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
834-
; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
811+
; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
812+
; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
813+
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
835814
; XOP-NEXT: retq
836815
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
837816
%b = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>

llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,14 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
230230
; SSE2-LABEL: p7_vector_urem_by_const__nonsplat_undef2:
231231
; SSE2: # %bb.0:
232232
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233-
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
233+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
234234
; SSE2-NEXT: movdqa %xmm0, %xmm2
235235
; SSE2-NEXT: pmuludq %xmm1, %xmm2
236236
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
237237
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
238238
; SSE2-NEXT: pmuludq %xmm1, %xmm3
239239
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
240240
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
241-
; SSE2-NEXT: psrld $2, %xmm2
242241
; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
243242
; SSE2-NEXT: psubd %xmm2, %xmm0
244243
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -249,12 +248,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
249248
; SSE4: # %bb.0:
250249
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
251250
; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
252-
; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
251+
; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
253252
; SSE4-NEXT: pmuludq %xmm2, %xmm1
254253
; SSE4-NEXT: pmuludq %xmm0, %xmm2
255254
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
256255
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
257-
; SSE4-NEXT: psrld $2, %xmm2
258256
; SSE4-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
259257
; SSE4-NEXT: psubd %xmm2, %xmm0
260258
; SSE4-NEXT: pxor %xmm1, %xmm1
@@ -266,12 +264,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
266264
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
267265
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
268266
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
269-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
267+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
270268
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
271269
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
272270
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
273271
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
274-
; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
275272
; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,0,6,0,6,0,6,0]
276273
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
277274
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1

0 commit comments

Comments
 (0)