-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[IndVars] Fix high-cost-expand check in LFTR #125828
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesGuard against high-cost expansions in genLoopLimit, by checking IVLimit against SCEVExpander::isHighCostExpansion. Full diff: https://github.com/llvm/llvm-project/pull/125828.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9619dfdbf41231..c3a9fa969eb3d3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -912,7 +912,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
/// is taken ExitCount times.
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
const SCEV *ExitCount, bool UsePostInc, Loop *L,
- SCEVExpander &Rewriter, ScalarEvolution *SE) {
+ SCEVExpander &Rewriter, ScalarEvolution *SE,
+ const TargetTransformInfo *TTI) {
assert(isLoopCounter(IndVar, L, SE));
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -935,6 +936,9 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
+ if (Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget, TTI,
+ IndVar))
+ return nullptr;
return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
ExitingBB->getTerminator());
}
@@ -995,8 +999,10 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
}
- Value *ExitCnt = genLoopLimit(
- IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
+ Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L,
+ Rewriter, SE, TTI);
+ if (!ExitCnt)
+ return false;
assert(ExitCnt->getType()->isPointerTy() ==
IndVar->getType()->isPointerTy() &&
"genLoopLimit missed a cast");
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index cb0be523c70955..48e5c589797dc9 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -158,17 +158,14 @@ define i8 @testnullptrint(ptr %buf, ptr %end) nounwind {
; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
; PTR64: preheader:
-; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1
-; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
-; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
; PTR64-NEXT: br label [[LOOP:%.*]]
; PTR64: loop:
; PTR64-NEXT: [[P_01_US_US:%.*]] = phi ptr [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; PTR64-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[P_01_US_US]], i64 1
; PTR64-NEXT: [[SNEXT:%.*]] = load i8, ptr [[GEP]], align 1
-; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[GEP]], [[SCEVGEP]]
+; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1
+; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; PTR64: exit.loopexit:
; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll b/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
index b7f4756b2757fd..636ea9f53042e5 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
@@ -41,17 +41,15 @@ end:
define void @test_ptr(i32 %start) {
; CHECK-LABEL: @test_ptr(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr @data, i64 [[TMP3]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY]] ], [ [[P_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[I_INC]] = add nuw i32 [[I]], 1
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 1
; CHECK-NEXT: store volatile i8 0, ptr [[P_INC]], align 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[P_INC]], [[UGLYGEP]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[I_INC]], 7
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 37bc67c23adb75..4112b85e43e20b 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -14,20 +14,20 @@ define dso_local void @hoge() local_unnamed_addr {
; CHECK-LABEL: @hoge(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = sdiv exact i64 undef, 40
-; CHECK-NEXT: [[TMP0:%.*]] = sub i64 undef, [[N]]
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]]
; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]]
; CHECK: inner.preheader:
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1
+; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], 1
; CHECK-NEXT: store i64 undef, ptr @ptr, align 8
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[J]], [[IDX]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]]
; CHECK: inner_exit:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ]
@@ -35,7 +35,6 @@ define dso_local void @hoge() local_unnamed_addr {
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[IDX_NEXT]] = add nsw i64 [[IDX]], -1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br label [[HEADER]]
; CHECK: end:
; CHECK-NEXT: ret void
|
✅ With the latest revision this PR passed the undef deprecator. |
Pending update of Transforms/PhaseOrdering/runtime-check-removal.ll. |
ea6d845
to
13f16d1
Compare
Rebased and ready to review. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, it seems like costing IVLimit instead of ExitCount ends up having the opposite effect and we do more LFTR? Is IVLimit usually a simpler expression than ExitCount?
It's neither strictly more or strictly less complex. In some cases, we do more LFTR; in others, we do less LFTR. Considering that we're trying to phase out LFTR gradually, maybe I can add a FIXME high-cost-check with ExitCount as well, to avoid regressions? |
13f16d1
to
c91471b
Compare
Gentle ping. |
Gentle ping. |
Gentle ping. |
Guard against high-cost expansions in genLoopLimit, by checking IVLimit against SCEVExpander::isHighCostExpansion.
c91471b
to
6d428b6
Compare
Gentle ping. I think we ultimately want #126086, although we're still undecided on it, but I think this PR will give us many small improvements, and I think it is a step in the right direction. |
Call SCEVExpander::isHighCostExpansion on the expression that's actually expanded.