Skip to content

Commit 945eeb2

Browse files
authored
[InstCombine] Simplify (X / C0) * C1 + (X % C0) * C2 to (X / C0) * (C1 - C2 * C0) + X * C2 (#76285)
Since `DivRemPairPass` runs after `ReassociatePass` in the optimization pipeline, I decided to do this simplification in `InstCombine`. Alive2: https://alive2.llvm.org/ce/z/Jgsiqf Fixes #76128.
1 parent d149370 commit 945eeb2

File tree

3 files changed

+195
-0
lines changed

3 files changed

+195
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,8 @@ static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) {
11341134

11351135
// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
11361136
// does not overflow.
1137+
// Simplifies (X / C0) * C1 + (X % C0) * C2 to
1138+
// (X / C0) * (C1 - C2 * C0) + X * C2
11371139
Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
11381140
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
11391141
Value *X, *MulOpV;
@@ -1161,6 +1163,33 @@ Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
11611163
}
11621164
}
11631165

1166+
// Match I = (X / C0) * C1 + (X % C0) * C2
1167+
Value *Div, *Rem;
1168+
APInt C1, C2;
1169+
if (!LHS->hasOneUse() || !MatchMul(LHS, Div, C1))
1170+
Div = LHS, C1 = APInt(I.getType()->getScalarSizeInBits(), 1);
1171+
if (!RHS->hasOneUse() || !MatchMul(RHS, Rem, C2))
1172+
Rem = RHS, C2 = APInt(I.getType()->getScalarSizeInBits(), 1);
1173+
if (match(Div, m_IRem(m_Value(), m_Value()))) {
1174+
std::swap(Div, Rem);
1175+
std::swap(C1, C2);
1176+
}
1177+
Value *DivOpV;
1178+
APInt DivOpC;
1179+
if (MatchRem(Rem, X, C0, IsSigned) &&
1180+
MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC) {
1181+
APInt NewC = C1 - C2 * C0;
1182+
if (!NewC.isZero() && !Rem->hasOneUse())
1183+
return nullptr;
1184+
if (!isGuaranteedNotToBeUndef(X, &AC, &I, &DT))
1185+
return nullptr;
1186+
Value *MulXC2 = Builder.CreateMul(X, ConstantInt::get(X->getType(), C2));
1187+
if (NewC.isZero())
1188+
return MulXC2;
1189+
return Builder.CreateAdd(
1190+
Builder.CreateMul(Div, ConstantInt::get(X->getType(), NewC)), MulXC2);
1191+
}
1192+
11641193
return nullptr;
11651194
}
11661195

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3958,6 +3958,10 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
39583958
/*SimplifyOnly*/ false, *this))
39593959
return BinaryOperator::CreateOr(Op0, V);
39603960

3961+
if (cast<PossiblyDisjointInst>(I).isDisjoint())
3962+
if (Value *V = SimplifyAddWithRemainder(I))
3963+
return replaceInstUsesWith(I, V);
3964+
39613965
return nullptr;
39623966
}
39633967

llvm/test/Transforms/InstCombine/add4.ll

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
33

4+
declare void @use(i32)
5+
46
define i64 @match_unsigned(i64 %x) {
57
; CHECK-LABEL: @match_unsigned(
68
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[X:%.*]], 19136
@@ -127,3 +129,163 @@ define i32 @not_match_overflow(i32 %x) {
127129
%t4 = add i32 %t, %t3
128130
ret i32 %t4
129131
}
132+
133+
; Tests from PR76128.
134+
define i32 @fold_add_udiv_urem(i32 noundef %val) {
135+
; CHECK-LABEL: @fold_add_udiv_urem(
136+
; CHECK-NEXT: entry:
137+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
138+
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw i32 [[DIV]], 6
139+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[VAL]]
140+
; CHECK-NEXT: ret i32 [[ADD]]
141+
;
142+
entry:
143+
%div = udiv i32 %val, 10
144+
%shl = shl i32 %div, 4
145+
%rem = urem i32 %val, 10
146+
%add = add i32 %shl, %rem
147+
ret i32 %add
148+
}
149+
define i32 @fold_add_sdiv_srem(i32 noundef %val) {
150+
; CHECK-LABEL: @fold_add_sdiv_srem(
151+
; CHECK-NEXT: entry:
152+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[VAL:%.*]], 10
153+
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i32 [[DIV]], 6
154+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[VAL]]
155+
; CHECK-NEXT: ret i32 [[ADD]]
156+
;
157+
entry:
158+
%div = sdiv i32 %val, 10
159+
%shl = shl i32 %div, 4
160+
%rem = srem i32 %val, 10
161+
%add = add i32 %shl, %rem
162+
ret i32 %add
163+
}
164+
define i32 @fold_add_udiv_urem_to_mul(i32 noundef %val) {
165+
; CHECK-LABEL: @fold_add_udiv_urem_to_mul(
166+
; CHECK-NEXT: entry:
167+
; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[VAL:%.*]], 3
168+
; CHECK-NEXT: ret i32 [[ADD]]
169+
;
170+
entry:
171+
%div = udiv i32 %val, 7
172+
%mul1 = mul i32 %div, 21
173+
%rem = urem i32 %val, 7
174+
%mul2 = mul i32 %rem, 3
175+
%add = add i32 %mul1, %mul2
176+
ret i32 %add
177+
}
178+
define i32 @fold_add_udiv_urem_to_mul_multiuse(i32 noundef %val) {
179+
; CHECK-LABEL: @fold_add_udiv_urem_to_mul_multiuse(
180+
; CHECK-NEXT: entry:
181+
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL:%.*]], 7
182+
; CHECK-NEXT: call void @use(i32 [[REM]])
183+
; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[VAL]], 3
184+
; CHECK-NEXT: ret i32 [[ADD]]
185+
;
186+
entry:
187+
%div = udiv i32 %val, 7
188+
%mul1 = mul i32 %div, 21
189+
%rem = urem i32 %val, 7
190+
call void @use(i32 %rem)
191+
%mul2 = mul i32 %rem, 3
192+
%add = add i32 %mul1, %mul2
193+
ret i32 %add
194+
}
195+
define i32 @fold_add_udiv_urem_commuted(i32 noundef %val) {
196+
; CHECK-LABEL: @fold_add_udiv_urem_commuted(
197+
; CHECK-NEXT: entry:
198+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
199+
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw i32 [[DIV]], 6
200+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[VAL]]
201+
; CHECK-NEXT: ret i32 [[ADD]]
202+
;
203+
entry:
204+
%div = udiv i32 %val, 10
205+
%shl = shl i32 %div, 4
206+
%rem = urem i32 %val, 10
207+
%add = add i32 %rem, %shl
208+
ret i32 %add
209+
}
210+
define i32 @fold_add_udiv_urem_or_disjoint(i32 noundef %val) {
211+
; CHECK-LABEL: @fold_add_udiv_urem_or_disjoint(
212+
; CHECK-NEXT: entry:
213+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
214+
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw i32 [[DIV]], 6
215+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[VAL]]
216+
; CHECK-NEXT: ret i32 [[ADD]]
217+
;
218+
entry:
219+
%div = udiv i32 %val, 10
220+
%shl = shl i32 %div, 4
221+
%rem = urem i32 %val, 10
222+
%add = or disjoint i32 %shl, %rem
223+
ret i32 %add
224+
}
225+
; Negative tests
226+
define i32 @fold_add_udiv_urem_without_noundef(i32 %val) {
227+
; CHECK-LABEL: @fold_add_udiv_urem_without_noundef(
228+
; CHECK-NEXT: entry:
229+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
230+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[DIV]], 4
231+
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL]], 10
232+
; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[SHL]], [[REM]]
233+
; CHECK-NEXT: ret i32 [[ADD]]
234+
;
235+
entry:
236+
%div = udiv i32 %val, 10
237+
%shl = shl i32 %div, 4
238+
%rem = urem i32 %val, 10
239+
%add = add i32 %shl, %rem
240+
ret i32 %add
241+
}
242+
define i32 @fold_add_udiv_urem_multiuse_mul(i32 noundef %val) {
243+
; CHECK-LABEL: @fold_add_udiv_urem_multiuse_mul(
244+
; CHECK-NEXT: entry:
245+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
246+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[DIV]], 4
247+
; CHECK-NEXT: call void @use(i32 [[SHL]])
248+
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL]], 10
249+
; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[SHL]], [[REM]]
250+
; CHECK-NEXT: ret i32 [[ADD]]
251+
;
252+
entry:
253+
%div = udiv i32 %val, 10
254+
%shl = shl i32 %div, 4
255+
call void @use(i32 %shl)
256+
%rem = urem i32 %val, 10
257+
%add = add i32 %shl, %rem
258+
ret i32 %add
259+
}
260+
define i32 @fold_add_udiv_srem(i32 noundef %val) {
261+
; CHECK-LABEL: @fold_add_udiv_srem(
262+
; CHECK-NEXT: entry:
263+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
264+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[DIV]], 4
265+
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[VAL]], 10
266+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[REM]]
267+
; CHECK-NEXT: ret i32 [[ADD]]
268+
;
269+
entry:
270+
%div = udiv i32 %val, 10
271+
%shl = shl i32 %div, 4
272+
%rem = srem i32 %val, 10
273+
%add = add i32 %shl, %rem
274+
ret i32 %add
275+
}
276+
define i32 @fold_add_udiv_urem_non_constant(i32 noundef %val, i32 noundef %c) {
277+
; CHECK-LABEL: @fold_add_udiv_urem_non_constant(
278+
; CHECK-NEXT: entry:
279+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], [[C:%.*]]
280+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[DIV]], 4
281+
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL]], [[C]]
282+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[REM]]
283+
; CHECK-NEXT: ret i32 [[ADD]]
284+
;
285+
entry:
286+
%div = udiv i32 %val, %c
287+
%shl = shl i32 %div, 4
288+
%rem = urem i32 %val, %c
289+
%add = add i32 %shl, %rem
290+
ret i32 %add
291+
}

0 commit comments

Comments
 (0)