Skip to content

Commit 9186df9

Browse files
[InlineCost] Simplify extractvalue across callsite (#145054)
Motivation: When using libc++, `std::bitset<64>::count()` doesn't optimize to a single popcount instruction on AArch64, because we fail to inline the library code completely. Inlining fails, because the internal bit_iterator struct is passed as a [2 x i64] %arg value on AArch64. The value is built using insertvalue instructions and only one of the array entries is constant. If we know that this entry is constant, we can prove that half the function becomes dead. However, InlineCost only considers operands for simplification if they are Constants, which %arg is not. Without this simplification the function is too expensive to inline. Therefore, we had to teach InlineCost to support non-Constant simplified values (PR #145083). Now, we enable this for extractvalue, because we want to simplify the extractvalue with the insertvalues from the caller function. This is enough to get bitset::count fully optimized. There are similar opportunities we can explore for BinOps in the future (e.g. cmp eq %arg1, %arg2 when the caller passes the same value into both arguments), but we need to be careful here, because InstSimplify isn't completely safe to use with operands owned by different functions.
1 parent e04c938 commit 9186df9

File tree

2 files changed

+79
-3
lines changed

2 files changed

+79
-3
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2316,9 +2316,18 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
23162316
}
23172317

23182318
bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2319-
// Constant folding for extract value is trivial.
2320-
if (simplifyInstruction(I))
2321-
return true;
2319+
Value *Op = I.getAggregateOperand();
2320+
2321+
// Special handling, because we want to simplify extractvalue with a
2322+
// potential insertvalue from the caller.
2323+
if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
2324+
SimplifyQuery SQ(DL);
2325+
Value *SimpleV = simplifyExtractValueInst(SimpleOp, I.getIndices(), SQ);
2326+
if (SimpleV) {
2327+
SimplifiedValues[&I] = SimpleV;
2328+
return true;
2329+
}
2330+
}
23222331

23232332
// SROA can't look through these, but they may be free.
23242333
return Base::visitExtractValue(I);
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -S -passes=inline | FileCheck %s
3+
4+
define i32 @callee([2 x i32] %agg) {
5+
; CHECK-LABEL: define i32 @callee(
6+
; CHECK-SAME: [2 x i32] [[AGG:%.*]]) {
7+
; CHECK-NEXT: [[V:%.*]] = extractvalue [2 x i32] [[AGG]], 0
8+
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V]], 0
9+
; CHECK-NEXT: br i1 [[C]], label %[[IS_NULL:.*]], label %[[NON_NULL:.*]]
10+
; CHECK: [[IS_NULL]]:
11+
; CHECK-NEXT: ret i32 0
12+
; CHECK: [[NON_NULL]]:
13+
; CHECK-NEXT: [[R:%.*]] = call i32 @callee([2 x i32] [[AGG]])
14+
; CHECK-NEXT: ret i32 [[R]]
15+
;
16+
%v = extractvalue [2 x i32] %agg, 0
17+
%c = icmp eq i32 %v, 0
18+
br i1 %c, label %is_null, label %non_null
19+
20+
is_null:
21+
ret i32 0
22+
23+
non_null:
24+
%r = call i32 @callee([2 x i32] %agg)
25+
ret i32 %r
26+
}
27+
28+
define i32 @caller_simplified(i32 %arg) {
29+
; CHECK-LABEL: define i32 @caller_simplified(
30+
; CHECK-SAME: i32 [[ARG:%.*]]) {
31+
; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
32+
; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 1
33+
; CHECK-NEXT: ret i32 0
34+
;
35+
%agg0 = insertvalue [2 x i32] poison, i32 0, 0
36+
%agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 1
37+
%v = call i32 @callee([2 x i32] %agg1)
38+
ret i32 %v
39+
}
40+
41+
define i32 @caller_not_simplified(i32 %arg) {
42+
; CHECK-LABEL: define i32 @caller_not_simplified(
43+
; CHECK-SAME: i32 [[ARG:%.*]]) {
44+
; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 1, 0
45+
; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 1
46+
; CHECK-NEXT: [[V:%.*]] = call i32 @callee([2 x i32] [[AGG1]])
47+
; CHECK-NEXT: ret i32 [[V]]
48+
;
49+
%agg0 = insertvalue [2 x i32] poison, i32 1, 0
50+
%agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 1
51+
%v = call i32 @callee([2 x i32] %agg1)
52+
ret i32 %v
53+
}
54+
55+
define i32 @caller_not_simplified2(i32 %arg) {
56+
; CHECK-LABEL: define i32 @caller_not_simplified2(
57+
; CHECK-SAME: i32 [[ARG:%.*]]) {
58+
; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 0, 1
59+
; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 0
60+
; CHECK-NEXT: [[V:%.*]] = call i32 @callee([2 x i32] [[AGG1]])
61+
; CHECK-NEXT: ret i32 [[V]]
62+
;
63+
%agg0 = insertvalue [2 x i32] poison, i32 0, 1
64+
%agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 0
65+
%v = call i32 @callee([2 x i32] %agg1)
66+
ret i32 %v
67+
}

0 commit comments

Comments
 (0)