Skip to content

Commit f672f27

Browse files
AZero13yuxuanchen1997
authored andcommitted
[CodeGen] Remove checks for vectors in unsigned division prior to computing leading zeros (#99524)
Summary: It turns out we can safely use DAG.computeKnownBits(N0).countMinLeadingZeros() with constant legal vectors, so remove the check for it. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251562
1 parent ba3742d commit f672f27

File tree

3 files changed

+67
-16
lines changed

3 files changed

+67
-16
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6483,15 +6483,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
64836483

64846484
// Try to use leading zeros of the dividend to reduce the multiplier and
64856485
// avoid expensive fixups.
6486-
// TODO: Support vectors.
6487-
unsigned LeadingZeros = 0;
6488-
if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6489-
assert(!isOneConstant(N1) && "Unexpected divisor");
6490-
LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6491-
// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6492-
// the dividend exceeds the leading zeros for the divisor.
6493-
LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6494-
}
6486+
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
64956487

64966488
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
64976489
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
@@ -6510,7 +6502,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
65106502
MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
65116503
} else {
65126504
UnsignedDivisionByConstantInfo magics =
6513-
UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6505+
UnsignedDivisionByConstantInfo::get(
6506+
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
65146507

65156508
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
65166509

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,3 +755,64 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755755
%r = udiv <4 x i1> %x, %y
756756
ret <4 x i1> %r
757757
}
758+
759+
define <4 x i32> @vector_div_leading_zeros(<4 x i32> %x) {
760+
; SSE2-LABEL: vector_div_leading_zeros:
761+
; SSE2: # %bb.0:
762+
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
765+
; SSE2-NEXT: pmuludq %xmm1, %xmm0
766+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
767+
; SSE2-NEXT: pmuludq %xmm1, %xmm2
768+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
769+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
770+
; SSE2-NEXT: retq
771+
;
772+
; SSE41-LABEL: vector_div_leading_zeros:
773+
; SSE41: # %bb.0:
774+
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
775+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
776+
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
777+
; SSE41-NEXT: pmuludq %xmm2, %xmm1
778+
; SSE41-NEXT: pmuludq %xmm2, %xmm0
779+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
780+
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
781+
; SSE41-NEXT: retq
782+
;
783+
; AVX1-LABEL: vector_div_leading_zeros:
784+
; AVX1: # %bb.0:
785+
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
787+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
788+
; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
789+
; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
790+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
791+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
792+
; AVX1-NEXT: retq
793+
;
794+
; AVX2-LABEL: vector_div_leading_zeros:
795+
; AVX2: # %bb.0:
796+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
797+
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
799+
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
800+
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
801+
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
802+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
803+
; AVX2-NEXT: retq
804+
;
805+
; XOP-LABEL: vector_div_leading_zeros:
806+
; XOP: # %bb.0:
807+
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
808+
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
809+
; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
810+
; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
811+
; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
812+
; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
813+
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
814+
; XOP-NEXT: retq
815+
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
816+
%b = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
817+
ret <4 x i32> %b
818+
}

llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,14 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
230230
; SSE2-LABEL: p7_vector_urem_by_const__nonsplat_undef2:
231231
; SSE2: # %bb.0:
232232
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233-
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
233+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
234234
; SSE2-NEXT: movdqa %xmm0, %xmm2
235235
; SSE2-NEXT: pmuludq %xmm1, %xmm2
236236
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
237237
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
238238
; SSE2-NEXT: pmuludq %xmm1, %xmm3
239239
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
240240
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
241-
; SSE2-NEXT: psrld $2, %xmm2
242241
; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
243242
; SSE2-NEXT: psubd %xmm2, %xmm0
244243
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -249,12 +248,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
249248
; SSE4: # %bb.0:
250249
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
251250
; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
252-
; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
251+
; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
253252
; SSE4-NEXT: pmuludq %xmm2, %xmm1
254253
; SSE4-NEXT: pmuludq %xmm0, %xmm2
255254
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
256255
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
257-
; SSE4-NEXT: psrld $2, %xmm2
258256
; SSE4-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0]
259257
; SSE4-NEXT: psubd %xmm2, %xmm0
260258
; SSE4-NEXT: pxor %xmm1, %xmm1
@@ -266,12 +264,11 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32
266264
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
267265
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
268266
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
269-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
267+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [715827883,715827883,715827883,715827883]
270268
; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
271269
; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
272270
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
273271
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
274-
; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
275272
; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,0,6,0,6,0,6,0]
276273
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
277274
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1

0 commit comments

Comments
 (0)