Skip to content

Commit cb76896

Browse files
authored
[SCEVExpander] Recognize urem idiom during expansion (#96005)
If we have a urem expression, emitting it as a urem is significantly better that letting the fully expansion kick in. We have the risk of a udiv or mul which could have previously been shared, but loosing that seems like a reasonable tradeoff for being able to round trip a urem w/o modification.
1 parent 1003f5b commit cb76896

File tree

4 files changed

+16
-9
lines changed

4 files changed

+16
-9
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14972,6 +14972,9 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
1497214972
// 4, A / B becomes X / 8).
1497314973
bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
1497414974
const SCEV *&RHS) {
14975+
if (Expr->getType()->isPointerTy())
14976+
return false;
14977+
1497514978
// Try to match 'zext (trunc A to iB) to iY', which is used
1497614979
// for URem with constant power-of-2 second operands. Make sure the size of
1497714980
// the operand A matches the size of the whole expressions.

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,16 @@ class LoopCompare {
491491
}
492492

493493
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
494+
// Recognize the canonical representation of an unsimplifed urem.
495+
const SCEV *URemLHS = nullptr;
496+
const SCEV *URemRHS = nullptr;
497+
if (SE.matchURem(S, URemLHS, URemRHS)) {
498+
Value *LHS = expand(URemLHS);
499+
Value *RHS = expand(URemRHS);
500+
return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
501+
/*IsSafeToHoist*/ false);
502+
}
503+
494504
// Collect all the add operands in a loop, along with their associated loops.
495505
// Iterate in reverse so that constants are emitted last, all else equal, and
496506
// so that pointer operands are inserted first, which the code below relies on

llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ define i32 @test_pr38847() {
2222
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[LSR]], -1
2323
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT:%.*]]
2424
; CHECK: exit:
25-
; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 [[LSR_IV_NEXT2]], 9
26-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 9
27-
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[LSR_IV_NEXT2]], [[TMP1]]
25+
; CHECK-NEXT: [[TMP2:%.*]] = urem i32 [[LSR_IV_NEXT2]], 9
2826
; CHECK-NEXT: ret i32 [[TMP2]]
2927
;
3028
entry:
@@ -109,9 +107,7 @@ define i32 @test_pr62852() {
109107
; CHECK: exit:
110108
; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT]])
111109
; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT2]])
112-
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[DEC_1]], 53
113-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 53
114-
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DEC_1]], [[TMP2]]
110+
; CHECK-NEXT: [[TMP3:%.*]] = urem i32 [[DEC_1]], 53
115111
; CHECK-NEXT: ret i32 [[TMP3]]
116112
;
117113
entry:

llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -723,9 +723,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64
723723
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
724724
; CHECK-NEXT: entry:
725725
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
726-
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[N]], 42
727-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 42
728-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[TMP1]]
726+
; CHECK-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 42
729727
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0)
730728
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[SMAX1]])
731729
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[UMIN]], 1

0 commit comments

Comments
 (0)