Skip to content

Commit 756af2f

Browse files
committed
[SCEV] SCEVExpander::isHighCostExpansionHelper(): cost-model add/mul
Summary: While this resolves the regression from D73722 in `llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll`, this now regresses `llvm/test/Transforms/IndVarSimplify/elim-extend.ll` `@nestedIV` test, we no longer can perform that expansion within default budget of `4`, but require budget of `6`. That regression is being addressed by D73777. The basic idea here is simple. ``` Op0, Op1, Op2 ... | | | \--+--/ | | | \---+---/ ``` I.e. given N operands, we will have N-1 operations, so we have to add cost of an add (mul) for **every** Op processed, **except** the first one, plus we need to recurse into *every* Op. I'm guessing there's already canonicalization that ensures we won't have `1` operand in `scMulExpr`, and no `0` in `scAddExpr`/`scMulExpr`. Reviewers: reames, mkazantsev, wmi, sanjoy Reviewed By: mkazantsev Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73728
1 parent cc29600 commit 756af2f

File tree

3 files changed

+38
-7
lines changed

3 files changed

+38
-7
lines changed

llvm/lib/Analysis/ScalarEvolutionExpander.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,6 +2219,40 @@ bool SCEVExpander::isHighCostExpansionHelper(
22192219
TTI, Processed);
22202220
}
22212221

2222+
if (S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr) {
2223+
const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S);
2224+
2225+
unsigned Opcode;
2226+
switch (S->getSCEVType()) {
2227+
case scAddExpr:
2228+
Opcode = Instruction::Add;
2229+
break;
2230+
case scMulExpr:
2231+
Opcode = Instruction::Mul;
2232+
break;
2233+
default:
2234+
llvm_unreachable("There are no other variants here.");
2235+
}
2236+
2237+
Type *OpType = NAry->getType();
2238+
int PairCost = TTI.getOperationCost(Opcode, OpType);
2239+
// TODO: this is a very pessimistic cost modelling for Mul,
2240+
// because of Bin Pow algorithm actually used by the expander,
2241+
// see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
2242+
2243+
assert(NAry->getNumOperands() > 1 &&
2244+
"Nary expr should have more than 1 operand.");
2245+
for (const SCEV *Op : NAry->operands()) {
2246+
if (isHighCostExpansionHelper(Op, L, At, BudgetRemaining, TTI, Processed))
2247+
return true;
2248+
if (Op == *NAry->op_begin())
2249+
continue;
2250+
BudgetRemaining -= PairCost;
2251+
}
2252+
2253+
return BudgetRemaining < 0;
2254+
}
2255+
22222256
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
22232257
// the exit condition.
22242258
if (isa<SCEVMinMaxExpr>(S))

llvm/test/Transforms/IndVarSimplify/elim-extend.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
135135
; CHECK-NEXT: store i8 0, i8* [[ADR2]]
136136
; CHECK-NEXT: [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
137137
; CHECK-NEXT: store i8 0, i8* [[ADR3]]
138-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
139-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
138+
; CHECK-NEXT: [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
139+
; CHECK-NEXT: br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
140140
; CHECK: innerexit:
141141
; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
142142
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32

llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
1919
; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11
2020
; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
2121
; CHECK: while.body.lr.ph:
22-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12
23-
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12
24-
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 12
2522
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
2623
; CHECK: while.body:
2724
; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
@@ -39,10 +36,10 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
3936
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11
4037
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
4138
; CHECK: while.cond.while.end_crit_edge:
42-
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
39+
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ]
4340
; CHECK-NEXT: br label [[WHILE_END]]
4441
; CHECK: while.end:
45-
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
42+
; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
4643
; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]])
4744
; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4
4845
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]])

0 commit comments

Comments
 (0)