Skip to content

Commit aa85400

Browse files
committed
[PseudoProbe] Extend to skip instrumenting probe into the dests of invoke
1 parent 75a1c4e commit aa85400

File tree

5 files changed

+202
-15
lines changed

5 files changed

+202
-15
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,11 @@ class SampleProfileProber {
8181
uint64_t getFunctionHash() const { return FunctionHash; }
8282
uint32_t getBlockId(const BasicBlock *BB) const;
8383
uint32_t getCallsiteId(const Instruction *Call) const;
84-
void computeCFGHash();
85-
void computeProbeIdForBlocks();
84+
void findInvokeNormalDests(DenseSet<BasicBlock *> &InvokeNormalDests);
85+
void computeCFGHash(const DenseSet<BasicBlock *> &InvokeNormalDests,
86+
const DenseSet<BasicBlock *> &KnownColdBlocks);
87+
void computeProbeIdForBlocks(const DenseSet<BasicBlock *> &InvokeNormalDests,
88+
const DenseSet<BasicBlock *> &KnownColdBlocks);
8689
void computeProbeIdForCallsites();
8790

8891
Function *F;

llvm/lib/Transforms/IPO/SampleProfileProbe.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,20 +173,49 @@ SampleProfileProber::SampleProfileProber(Function &Func,
173173
BlockProbeIds.clear();
174174
CallProbeIds.clear();
175175
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
176-
computeProbeIdForBlocks();
176+
177+
DenseSet<BasicBlock *> InvokeNormalDests;
178+
findInvokeNormalDests(InvokeNormalDests);
179+
DenseSet<BasicBlock *> KnownColdBlocks;
180+
computeEHOnlyBlocks(*F, KnownColdBlocks);
181+
182+
computeProbeIdForBlocks(InvokeNormalDests, KnownColdBlocks);
177183
computeProbeIdForCallsites();
178-
computeCFGHash();
184+
computeCFGHash(InvokeNormalDests, KnownColdBlocks);
185+
}
186+
187+
void SampleProfileProber::findInvokeNormalDests(
188+
DenseSet<BasicBlock *> &InvokeNormalDests) {
189+
for (auto &BB : *F) {
190+
auto *TI = BB.getTerminator();
191+
if (auto *II = dyn_cast<InvokeInst>(TI))
192+
InvokeNormalDests.insert(II->getNormalDest());
193+
}
179194
}
180195

181196
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
182197
// value of each BB in the CFG. The higher 32 bits record the number of edges
183198
// preceded by the number of indirect calls.
184199
// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
185-
void SampleProfileProber::computeCFGHash() {
200+
void SampleProfileProber::computeCFGHash(
201+
const DenseSet<BasicBlock *> &InvokeNormalDests,
202+
const DenseSet<BasicBlock *> &KnownColdBlocks) {
186203
std::vector<uint8_t> Indexes;
187204
JamCRC JC;
188205
for (auto &BB : *F) {
189-
for (BasicBlock *Succ : successors(&BB)) {
206+
// Skip the EH flow blocks.
207+
if (InvokeNormalDests.contains(&BB) || KnownColdBlocks.contains(&BB))
208+
continue;
209+
210+
// Find the original successors by skipping the EH flow succs.
211+
auto *BBPtr = &BB;
212+
auto *TI = BBPtr->getTerminator();
213+
while (auto *II = dyn_cast<InvokeInst>(TI)) {
214+
BBPtr = II->getNormalDest();
215+
TI = BBPtr->getTerminator();
216+
}
217+
218+
for (BasicBlock *Succ : successors(BBPtr)) {
190219
auto Index = getBlockId(Succ);
191220
for (int J = 0; J < 4; J++)
192221
Indexes.push_back((uint8_t)(Index >> (J * 8)));
@@ -207,15 +236,15 @@ void SampleProfileProber::computeCFGHash() {
207236
<< ", Hash = " << FunctionHash << "\n");
208237
}
209238

210-
void SampleProfileProber::computeProbeIdForBlocks() {
211-
DenseSet<BasicBlock *> KnownColdBlocks;
212-
computeEHOnlyBlocks(*F, KnownColdBlocks);
239+
void SampleProfileProber::computeProbeIdForBlocks(
240+
const DenseSet<BasicBlock *> &InvokeNormalDests,
241+
const DenseSet<BasicBlock *> &KnownColdBlocks) {
213242
// Insert pseudo probe to non-cold blocks only. This will reduce IR size as
214243
// well as the binary size while retaining the profile quality.
215244
for (auto &BB : *F) {
216-
++LastProbeId;
217-
if (!KnownColdBlocks.contains(&BB))
218-
BlockProbeIds[&BB] = LastProbeId;
245+
if (InvokeNormalDests.contains(&BB) || KnownColdBlocks.contains(&BB))
246+
continue;
247+
BlockProbeIds[&BB] = ++LastProbeId;
219248
}
220249
}
221250

llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN
1313

1414

15-
; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo"
16-
; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main"
15+
; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo"
16+
; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main"
1717

1818
; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not
1919

llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ entry:
1818
to label %ret unwind label %lpad
1919

2020
ret:
21-
; CHECK: call void @llvm.pseudoprobe
21+
; CHECK-NOT: call void @llvm.pseudoprobe
2222
ret void
2323

2424
lpad: ; preds = %entry
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
; REQUIRES: x86_64-linux
2+
; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-unknown-linux-gnu"
6+
7+
$__clang_call_terminate = comdat any
8+
9+
@x = dso_local global i32 0, align 4, !dbg !0
10+
11+
; Function Attrs: mustprogress noinline nounwind uwtable
12+
define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 {
13+
entry:
14+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1
15+
%0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19
16+
%tobool = icmp ne i32 %0, 0, !dbg !17
17+
br i1 %tobool, label %if.then, label %if.else, !dbg !23
18+
19+
if.then: ; preds = %entry
20+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2
21+
invoke void @_Z3foov()
22+
to label %invoke.cont unwind label %terminate.lpad, !dbg !24
23+
24+
invoke.cont: ; preds = %if.then
25+
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
26+
invoke void @_Z3bazv()
27+
to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26
28+
29+
invoke.cont1: ; preds = %invoke.cont
30+
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
31+
br label %if.end, !dbg !27
32+
33+
if.else: ; preds = %entry
34+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3
35+
invoke void @_Z3foov()
36+
to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28
37+
38+
invoke.cont2: ; preds = %if.else
39+
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
40+
br label %if.end
41+
42+
if.end: ; preds = %invoke.cont2, %invoke.cont1
43+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4
44+
invoke void @_Z3foov()
45+
to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29
46+
47+
invoke.cont3: ; preds = %if.end
48+
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
49+
%1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19
50+
%tobool4 = icmp ne i32 %1, 0, !dbg !30
51+
br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32
52+
53+
if.then5: ; preds = %invoke.cont3
54+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5
55+
%2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19
56+
%inc = add nsw i32 %2, 1, !dbg !33
57+
store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19
58+
br label %if.end6, !dbg !35
59+
60+
if.end6: ; preds = %if.then5, %invoke.cont3
61+
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6
62+
ret void, !dbg !36
63+
64+
terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then
65+
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
66+
%3 = landingpad { ptr, i32 }
67+
catch ptr null, !dbg !24
68+
%4 = extractvalue { ptr, i32 } %3, 0, !dbg !24
69+
call void @__clang_call_terminate(ptr %4) #3, !dbg !24
70+
unreachable, !dbg !24
71+
}
72+
73+
; Function Attrs: mustprogress noinline nounwind uwtable
74+
define dso_local void @_Z3foov() #0 !dbg !37 {
75+
entry:
76+
ret void, !dbg !38
77+
}
78+
79+
declare i32 @__gxx_personality_v0(...)
80+
81+
; Function Attrs: noinline noreturn nounwind uwtable
82+
define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat {
83+
%2 = call ptr @__cxa_begin_catch(ptr %0) #4
84+
call void @_ZSt9terminatev() #3
85+
unreachable
86+
}
87+
88+
declare ptr @__cxa_begin_catch(ptr)
89+
90+
declare void @_ZSt9terminatev()
91+
92+
; Function Attrs: mustprogress noinline nounwind uwtable
93+
define dso_local void @_Z3bazv() #0 !dbg !39 {
94+
entry:
95+
ret void, !dbg !40
96+
}
97+
98+
; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 4294967295, !"_Z3bazv"}
99+
100+
attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
101+
attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
102+
attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
103+
attributes #3 = { noreturn nounwind }
104+
attributes #4 = { nounwind }
105+
106+
!llvm.dbg.cu = !{!2}
107+
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12}
108+
!llvm.ident = !{!13}
109+
110+
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
111+
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
112+
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
113+
!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02")
114+
!4 = !{!0}
115+
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
116+
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
117+
!7 = !{i32 7, !"Dwarf Version", i32 5}
118+
!8 = !{i32 2, !"Debug Info Version", i32 3}
119+
!9 = !{i32 1, !"wchar_size", i32 4}
120+
!10 = !{i32 8, !"PIC Level", i32 2}
121+
!11 = !{i32 7, !"PIE Level", i32 2}
122+
!12 = !{i32 7, !"uwtable", i32 2}
123+
!13 = !{!"clang version 19.0.0"}
124+
!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
125+
!15 = !DISubroutineType(types: !16)
126+
!16 = !{null}
127+
!17 = !DILocation(line: 5, column: 6, scope: !18)
128+
!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6)
129+
!19 = !{!20, !20, i64 0}
130+
!20 = !{!"int", !21, i64 0}
131+
!21 = !{!"omnipotent char", !22, i64 0}
132+
!22 = !{!"Simple C++ TBAA"}
133+
!23 = !DILocation(line: 5, column: 6, scope: !14)
134+
!24 = !DILocation(line: 6, column: 5, scope: !25)
135+
!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9)
136+
!26 = !DILocation(line: 7, column: 5, scope: !25)
137+
!27 = !DILocation(line: 8, column: 3, scope: !25)
138+
!28 = !DILocation(line: 9, column: 5, scope: !18)
139+
!29 = !DILocation(line: 11, column: 3, scope: !14)
140+
!30 = !DILocation(line: 12, column: 6, scope: !31)
141+
!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6)
142+
!32 = !DILocation(line: 12, column: 6, scope: !14)
143+
!33 = !DILocation(line: 13, column: 5, scope: !34)
144+
!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9)
145+
!35 = !DILocation(line: 14, column: 5, scope: !34)
146+
!36 = !DILocation(line: 17, column: 1, scope: !14)
147+
!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
148+
!38 = !DILocation(line: 19, column: 13, scope: !37)
149+
!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
150+
!40 = !DILocation(line: 18, column: 13, scope: !39)
151+
!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
152+
!42 = !DISubroutineType(types: !43)
153+
!43 = !{!6}
154+
!44 = !DILocation(line: 23, column: 3, scope: !41)
155+
!45 = !DILocation(line: 24, column: 1, scope: !41)

0 commit comments

Comments
 (0)