Skip to content

Commit daa9af1

Browse files
authored
[FuncSpec] Handle ssa_copy intrinsic calls in InstCostVisitor (#114247)
Look through ssa_copy intrinsic calls when computing codesize bonus for a specialization. Also remove redundant logic to skip computing codesize bonus for ssa_copy intrinsics, now these are considered zero-cost by TTI (in PR #75294).
1 parent 4f740f9 commit daa9af1

File tree

2 files changed

+121
-4
lines changed

2 files changed

+121
-4
lines changed

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ Cost InstCostVisitor::estimateBasicBlocks(
120120
continue;
121121

122122
for (Instruction &I : *BB) {
123-
// Disregard SSA copies.
124-
if (auto *II = dyn_cast<IntrinsicInst>(&I))
125-
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
126-
continue;
127123
// If it's a known constant we have already accounted for it.
128124
if (KnownConstants.contains(&I))
129125
continue;
@@ -402,6 +398,14 @@ Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
402398
}
403399

404400
Constant *InstCostVisitor::visitCallBase(CallBase &I) {
401+
assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
402+
403+
// Look through calls to ssa_copy intrinsics.
404+
if (auto *II = dyn_cast<IntrinsicInst>(&I);
405+
II && II->getIntrinsicID() == Intrinsic::ssa_copy) {
406+
return LastVisited->second;
407+
}
408+
405409
Function *F = I.getCalledFunction();
406410
if (!F || !canConstantFoldCallTo(&I, F))
407411
return nullptr;
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
; RUN: opt -passes=print-predicateinfo -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF
3+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
4+
; RUN: -funcspec-for-literal-constant=true \
5+
; RUN: -funcspec-min-codesize-savings=50 \
6+
; RUN: -funcspec-min-latency-savings=0 \
7+
; RUN: -S < %s | FileCheck %s --check-prefix=FUNCSPEC
8+
9+
; Verify that we are able to estimate the codesize savings by looking through
10+
; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
11+
; is run prior to FunctionSpecialization.
12+
define i32 @main() {
13+
entry:
14+
%res = call i32 @test_ssa_copy(i32 0)
15+
ret i32 %res
16+
}
17+
18+
define i32 @test_ssa_copy(i32 %x) {
19+
entry:
20+
br label %block1
21+
22+
block1:
23+
%cmp = icmp eq i32 %x, 0
24+
br i1 %cmp, label %block2, label %exit1
25+
26+
block2:
27+
br i1 %cmp, label %block3, label %exit2
28+
29+
block3:
30+
br i1 %cmp, label %exit4, label %exit3
31+
32+
exit1:
33+
ret i32 %x
34+
35+
exit2:
36+
ret i32 %x
37+
38+
exit3:
39+
ret i32 %x
40+
41+
exit4:
42+
ret i32 999
43+
}
44+
; PREDINF-LABEL: define i32 @main() {
45+
; PREDINF-NEXT: [[ENTRY:.*:]]
46+
; PREDINF-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
47+
; PREDINF-NEXT: ret i32 [[RES]]
48+
;
49+
;
50+
; PREDINF-LABEL: define i32 @test_ssa_copy(
51+
; PREDINF-SAME: i32 [[X:%.*]]) {
52+
; PREDINF-NEXT: [[ENTRY:.*:]]
53+
; PREDINF-NEXT: br label %[[BLOCK1:.*]]
54+
; PREDINF: [[BLOCK1]]:
55+
; PREDINF-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
56+
; PREDINF: [[CMP_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP]])
57+
; PREDINF: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
58+
; PREDINF: [[X_4:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
59+
; PREDINF-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
60+
; PREDINF: [[BLOCK2]]:
61+
; PREDINF: [[CMP_0_1:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP_0]])
62+
; PREDINF: [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
63+
; PREDINF: [[X_0_3:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
64+
; PREDINF-NEXT: br i1 [[CMP_0]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]]
65+
; PREDINF: [[BLOCK3]]:
66+
; PREDINF: [[X_0_1_2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0_1]])
67+
; PREDINF-NEXT: br i1 [[CMP_0_1]], label %[[EXIT4:.*]], label %[[EXIT3:.*]]
68+
; PREDINF: [[EXIT1]]:
69+
; PREDINF-NEXT: ret i32 [[X_4]]
70+
; PREDINF: [[EXIT2]]:
71+
; PREDINF-NEXT: ret i32 [[X_0_3]]
72+
; PREDINF: [[EXIT3]]:
73+
; PREDINF-NEXT: ret i32 [[X_0_1_2]]
74+
; PREDINF: [[EXIT4]]:
75+
; PREDINF-NEXT: ret i32 999
76+
;
77+
;
78+
; FUNCSPEC-LABEL: define i32 @main() {
79+
; FUNCSPEC-NEXT: [[ENTRY:.*:]]
80+
; FUNCSPEC-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
81+
; FUNCSPEC-NEXT: ret i32 999
82+
;
83+
;
84+
; FUNCSPEC-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
85+
; FUNCSPEC-SAME: i32 [[X:%.*]]) {
86+
; FUNCSPEC-NEXT: [[ENTRY:.*:]]
87+
; FUNCSPEC-NEXT: br label %[[BLOCK1:.*]]
88+
; FUNCSPEC: [[BLOCK1]]:
89+
; FUNCSPEC-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
90+
; FUNCSPEC-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
91+
; FUNCSPEC: [[BLOCK2]]:
92+
; FUNCSPEC-NEXT: br label %[[BLOCK3:.*]]
93+
; FUNCSPEC: [[BLOCK3]]:
94+
; FUNCSPEC-NEXT: br label %[[EXIT4:.*]]
95+
; FUNCSPEC: [[EXIT1]]:
96+
; FUNCSPEC-NEXT: ret i32 [[X]]
97+
; FUNCSPEC: [[EXIT4]]:
98+
; FUNCSPEC-NEXT: ret i32 999
99+
;
100+
;
101+
; FUNCSPEC-LABEL: define internal i32 @test_ssa_copy.specialized.1(
102+
; FUNCSPEC-SAME: i32 [[X:%.*]]) {
103+
; FUNCSPEC-NEXT: [[ENTRY:.*:]]
104+
; FUNCSPEC-NEXT: br label %[[BLOCK1:.*]]
105+
; FUNCSPEC: [[BLOCK1]]:
106+
; FUNCSPEC-NEXT: br label %[[BLOCK2:.*]]
107+
; FUNCSPEC: [[BLOCK2]]:
108+
; FUNCSPEC-NEXT: br label %[[BLOCK3:.*]]
109+
; FUNCSPEC: [[BLOCK3]]:
110+
; FUNCSPEC-NEXT: br label %[[EXIT4:.*]]
111+
; FUNCSPEC: [[EXIT4]]:
112+
; FUNCSPEC-NEXT: ret i32 poison
113+
;

0 commit comments

Comments
 (0)