Skip to content

Commit 4ec4ac1

Browse files
authored
[SCEVExpander] Fix addrec cost model (#106704)
The current isHighCostExpansion cost model for addrecs computes the cost for some kind of polynomial expansion that does not appear to have any relation to addrec expansion whatsoever. A literal expansion of an affine addrec is a phi and add (plus the expansion of start and step). For a non-affine addrec, we get another phi+add for each additional addrec nested in the step recurrence. This partially `fixes` #53205 (the runtime unroll test case in this PR).
1 parent c18be32 commit 4ec4ac1

File tree

3 files changed

+96
-56
lines changed

3 files changed

+96
-56
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,43 +1911,17 @@ template<typename T> static InstructionCost costAndCollectOperands(
19111911
break;
19121912
}
19131913
case scAddRecExpr: {
1914-
// In this polynominal, we may have some zero operands, and we shouldn't
1915-
// really charge for those. So how many non-zero coefficients are there?
1916-
int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1917-
return !Op->isZero();
1918-
});
1919-
1920-
assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1921-
assert(!(*std::prev(S->operands().end()))->isZero() &&
1922-
"Last operand should not be zero");
1923-
1924-
// Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1925-
int NumNonZeroDegreeNonOneTerms =
1926-
llvm::count_if(S->operands(), [](const SCEV *Op) {
1927-
auto *SConst = dyn_cast<SCEVConstant>(Op);
1928-
return !SConst || SConst->getAPInt().ugt(1);
1929-
});
1930-
1931-
// Much like with normal add expr, the polynominal will require
1932-
// one less addition than the number of it's terms.
1933-
InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1934-
/*MinIdx*/ 1, /*MaxIdx*/ 1);
1935-
// Here, *each* one of those will require a multiplication.
1936-
InstructionCost MulCost =
1937-
ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1938-
Cost = AddCost + MulCost;
1939-
1940-
// What is the degree of this polynominal?
1941-
int PolyDegree = S->getNumOperands() - 1;
1942-
assert(PolyDegree >= 1 && "Should be at least affine.");
1943-
1944-
// The final term will be:
1945-
// Op_{PolyDegree} * x ^ {PolyDegree}
1946-
// Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1947-
// Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1948-
// x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1949-
// FIXME: this is conservatively correct, but might be overly pessimistic.
1950-
Cost += MulCost * (PolyDegree - 1);
1914+
// Addrec expands to a phi and add per recurrence.
1915+
unsigned NumRecurrences = S->getNumOperands() - 1;
1916+
Cost += TTI.getCFInstrCost(Instruction::PHI, CostKind) * NumRecurrences;
1917+
Cost +=
1918+
TTI.getArithmeticInstrCost(Instruction::Add, S->getType(), CostKind) *
1919+
NumRecurrences;
1920+
// AR start is used in phi.
1921+
Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
1922+
// Other operands are used in add.
1923+
for (const SCEV *Op : S->operands().drop_front())
1924+
Worklist.emplace_back(Instruction::Add, 1, Op);
19511925
break;
19521926
}
19531927
}

llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,28 @@ define dso_local void @hoge() local_unnamed_addr {
1414
; CHECK-LABEL: @hoge(
1515
; CHECK-NEXT: entry:
1616
; CHECK-NEXT: [[N:%.*]] = sdiv exact i64 undef, 40
17+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 undef, [[N]]
1718
; CHECK-NEXT: br label [[HEADER:%.*]]
1819
; CHECK: header:
19-
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH:%.*]] ], [ undef, [[ENTRY:%.*]] ]
20+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
21+
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ undef, [[ENTRY]] ]
2022
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]]
2123
; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]]
2224
; CHECK: inner.preheader:
2325
; CHECK-NEXT: br label [[INNER:%.*]]
2426
; CHECK: inner:
2527
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
26-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
27-
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
28-
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], 1
28+
; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1
2929
; CHECK-NEXT: store i64 undef, ptr @ptr, align 8
30-
; CHECK-NEXT: [[COND1:%.*]] = icmp slt i64 [[J]], [[IDX]]
31-
; CHECK-NEXT: br i1 [[COND1]], label [[INNER]], label [[INNER_EXIT:%.*]]
30+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]]
31+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]]
3232
; CHECK: inner_exit:
3333
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ]
3434
; CHECK-NEXT: [[INDVAR_USE:%.*]] = add i64 [[INDVAR]], 1
3535
; CHECK-NEXT: br label [[LATCH]]
3636
; CHECK: latch:
3737
; CHECK-NEXT: [[IDX_NEXT]] = add nsw i64 [[IDX]], -1
38+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
3839
; CHECK-NEXT: br label [[HEADER]]
3940
; CHECK: end:
4041
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-addrec-cost.ll

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,27 @@ define void @selsort(ptr %array) #0 {
1313
; CHECK-NEXT: br i1 [[CMP21_NOT]], label %[[FOR_END18:.*]], label %[[FOR_BODY_LR_PH:.*]]
1414
; CHECK: [[FOR_BODY_LR_PH]]:
1515
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAY]], align 8
16+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP0]], -1
17+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP0]], -2
1618
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
1719
; CHECK: [[FOR_BODY]]:
1820
; CHECK-NEXT: [[BASE_022:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], %[[FOR_END:.*]] ]
21+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[BASE_022]], -1
22+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP7]], [[TMP10]]
23+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP8]], [[TMP10]]
1924
; CHECK-NEXT: [[ADD]] = add nuw i64 [[BASE_022]], 1
2025
; CHECK-NEXT: [[CMP318:%.*]] = icmp ult i64 [[ADD]], [[TMP0]]
2126
; CHECK-NEXT: br i1 [[CMP318]], label %[[FOR_BODY4_PREHEADER:.*]], label %[[FOR_END]]
2227
; CHECK: [[FOR_BODY4_PREHEADER]]:
23-
; CHECK-NEXT: br label %[[FOR_BODY4:.*]]
24-
; CHECK: [[FOR_BODY4]]:
25-
; CHECK-NEXT: [[MIN_020:%.*]] = phi i64 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY4]] ], [ [[BASE_022]], %[[FOR_BODY4_PREHEADER]] ]
26-
; CHECK-NEXT: [[C_019:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY4]] ], [ [[ADD]], %[[FOR_BODY4_PREHEADER]] ]
28+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP11]], 3
29+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
30+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY4_PROL_PREHEADER:.*]], label %[[FOR_BODY4_PROL_LOOPEXIT:.*]]
31+
; CHECK: [[FOR_BODY4_PROL_PREHEADER]]:
32+
; CHECK-NEXT: br label %[[FOR_BODY4_PROL:.*]]
33+
; CHECK: [[FOR_BODY4_PROL]]:
34+
; CHECK-NEXT: [[MIN_020:%.*]] = phi i64 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY4_PROL]] ], [ [[BASE_022]], %[[FOR_BODY4_PROL_PREHEADER]] ]
35+
; CHECK-NEXT: [[C_019:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY4_PROL]] ], [ [[ADD]], %[[FOR_BODY4_PROL_PREHEADER]] ]
36+
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY4_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY4_PROL]] ]
2737
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[C_019]]
2838
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
2939
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[MIN_020]]
@@ -32,18 +42,69 @@ define void @selsort(ptr %array) #0 {
3242
; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP7]], i64 [[C_019]], i64 [[MIN_020]]
3343
; CHECK-NEXT: [[INC]] = add nuw i64 [[C_019]], 1
3444
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INC]], [[TMP0]]
35-
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4]], label %[[FOR_END_LOOPEXIT:.*]]
36-
; CHECK: [[FOR_END_LOOPEXIT]]:
37-
; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[FOR_BODY4]] ]
38-
; CHECK-NEXT: br label %[[FOR_END]]
39-
; CHECK: [[FOR_END]]:
40-
; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i64 [ [[BASE_022]], %[[FOR_BODY]] ], [ [[SPEC_SELECT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
45+
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
46+
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
47+
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY4_PROL]], label %[[FOR_BODY4_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
48+
; CHECK: [[FOR_BODY4_PROL_LOOPEXIT_UNR_LCSSA]]:
49+
; CHECK-NEXT: [[MIN_020_UNR_PH:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[FOR_BODY4_PROL]] ]
50+
; CHECK-NEXT: [[C_019_UNR_PH:%.*]] = phi i64 [ [[INC]], %[[FOR_BODY4_PROL]] ]
51+
; CHECK-NEXT: [[SPEC_SELECT_LCSSA_UNR_PH:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[FOR_BODY4_PROL]] ]
52+
; CHECK-NEXT: br label %[[FOR_BODY4_PROL_LOOPEXIT]]
53+
; CHECK: [[FOR_BODY4_PROL_LOOPEXIT]]:
54+
; CHECK-NEXT: [[MIN_020_UNR:%.*]] = phi i64 [ [[BASE_022]], %[[FOR_BODY4_PREHEADER]] ], [ [[MIN_020_UNR_PH]], %[[FOR_BODY4_PROL_LOOPEXIT_UNR_LCSSA]] ]
55+
; CHECK-NEXT: [[C_019_UNR:%.*]] = phi i64 [ [[ADD]], %[[FOR_BODY4_PREHEADER]] ], [ [[C_019_UNR_PH]], %[[FOR_BODY4_PROL_LOOPEXIT_UNR_LCSSA]] ]
56+
; CHECK-NEXT: [[SPEC_SELECT_LCSSA_UNR:%.*]] = phi i64 [ poison, %[[FOR_BODY4_PREHEADER]] ], [ [[SPEC_SELECT_LCSSA_UNR_PH]], %[[FOR_BODY4_PROL_LOOPEXIT_UNR_LCSSA]] ]
57+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], 3
58+
; CHECK-NEXT: br i1 [[TMP9]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY4_PREHEADER_NEW:.*]]
59+
; CHECK: [[FOR_BODY4_PREHEADER_NEW]]:
60+
; CHECK-NEXT: br label %[[FOR_BODY4:.*]]
61+
; CHECK: [[FOR_BODY4]]:
62+
; CHECK-NEXT: [[MIN_20:%.*]] = phi i64 [ [[MIN_020_UNR]], %[[FOR_BODY4_PREHEADER_NEW]] ], [ [[SPEC_SELECT_3:%.*]], %[[FOR_BODY4]] ]
63+
; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i64 [ [[C_019_UNR]], %[[FOR_BODY4_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[FOR_BODY4]] ]
4164
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[MIN_0_LCSSA]]
4265
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
43-
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[BASE_022]]
66+
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[MIN_20]]
4467
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
45-
; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX9]], align 4
46-
; CHECK-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX11]], align 4
68+
; CHECK-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
69+
; CHECK-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[CMP8]], i64 [[MIN_0_LCSSA]], i64 [[MIN_20]]
70+
; CHECK-NEXT: [[INC1:%.*]] = add nuw i64 [[MIN_0_LCSSA]], 1
71+
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INC1]]
72+
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
73+
; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[SPEC_SELECT1]]
74+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4
75+
; CHECK-NEXT: [[CMP7_1:%.*]] = icmp ult i32 [[TMP12]], [[TMP13]]
76+
; CHECK-NEXT: [[SPEC_SELECT_1:%.*]] = select i1 [[CMP7_1]], i64 [[INC1]], i64 [[SPEC_SELECT1]]
77+
; CHECK-NEXT: [[INC_1:%.*]] = add nuw i64 [[MIN_0_LCSSA]], 2
78+
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INC_1]]
79+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
80+
; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[SPEC_SELECT_1]]
81+
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4
82+
; CHECK-NEXT: [[CMP7_2:%.*]] = icmp ult i32 [[TMP14]], [[TMP15]]
83+
; CHECK-NEXT: [[SPEC_SELECT_2:%.*]] = select i1 [[CMP7_2]], i64 [[INC_1]], i64 [[SPEC_SELECT_1]]
84+
; CHECK-NEXT: [[INC_2:%.*]] = add nuw i64 [[MIN_0_LCSSA]], 3
85+
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INC_2]]
86+
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
87+
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[SPEC_SELECT_2]]
88+
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4
89+
; CHECK-NEXT: [[CMP7_3:%.*]] = icmp ult i32 [[TMP16]], [[TMP17]]
90+
; CHECK-NEXT: [[SPEC_SELECT_3]] = select i1 [[CMP7_3]], i64 [[INC_2]], i64 [[SPEC_SELECT_2]]
91+
; CHECK-NEXT: [[INC_3]] = add nuw i64 [[MIN_0_LCSSA]], 4
92+
; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ult i64 [[INC_3]], [[TMP0]]
93+
; CHECK-NEXT: br i1 [[CMP3_3]], label %[[FOR_BODY4]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]]
94+
; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
95+
; CHECK-NEXT: [[SPEC_SELECT_LCSSA_PH:%.*]] = phi i64 [ [[SPEC_SELECT_3]], %[[FOR_BODY4]] ]
96+
; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT]]
97+
; CHECK: [[FOR_END_LOOPEXIT]]:
98+
; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT_LCSSA_UNR]], %[[FOR_BODY4_PROL_LOOPEXIT]] ], [ [[SPEC_SELECT_LCSSA_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
99+
; CHECK-NEXT: br label %[[FOR_END]]
100+
; CHECK: [[FOR_END]]:
101+
; CHECK-NEXT: [[MIN_0_LCSSA1:%.*]] = phi i64 [ [[BASE_022]], %[[FOR_BODY]] ], [ [[SPEC_SELECT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
102+
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[MIN_0_LCSSA1]]
103+
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
104+
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[BASE_022]]
105+
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4
106+
; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX10]], align 4
107+
; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX12]], align 4
47108
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[ADD]], [[TMP0]]
48109
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END18_LOOPEXIT:.*]], label %[[FOR_BODY]]
49110
; CHECK: [[FOR_END18_LOOPEXIT]]:
@@ -96,3 +157,7 @@ for.end18: ; preds = %for.end, %entry
96157
}
97158

98159
attributes #0 = { "tune-cpu"="generic" }
160+
;.
161+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
162+
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
163+
;.

0 commit comments

Comments
 (0)