Skip to content

Commit 1a71908

Browse files
committed
[CSSPGO][llvm-profgen] Always report dangling probes for frames with real samples.
Report dangling probes for frames that have real samples collected. Dangling probes are the probes associated to an empty block. When reported, sample count on a dangling probe will not be trusted by the compiler and we will rely on the counts inference algorithm to get the probe a reasonable count. This actually fixes a bug where previously only those dangling probes with samples collected were reported. This patch also fixes two existing issues. Pseudo probes are stored in `Address2ProbesMap` and their pointers are used in `PseudoProbeInlineTree`. Previously `std::vector` was used to store probes and the pointers to probes may get obsolete as the vector grows. I'm changing `std::vector` to `std::list` instead. The other issue is that all outlined functions shared the same inline frame previously due to the unchanged `Index` value as the dummy inlineSite identifier. Good results seen for SPEC2017 in general regarding profile quality. Reviewed By: wenlei, wlei Differential Revision: https://reviews.llvm.org/D100235
1 parent 3511022 commit 1a71908

File tree

7 files changed

+107
-30
lines changed

7 files changed

+107
-30
lines changed

llvm/include/llvm/MC/MCPseudoProbe.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
3737
// callees. Each record contains:
3838
// INLINE SITE
39-
// GUID of the inlinee (uint64)
4039
// ID of the callsite probe (ULEB128)
4140
// FUNCTION BODY
4241
// A FUNCTION BODY entry describing the inlined function.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
PERF_RECORD_MMAP2 595196/595196: [0x201000(0x1000) @ 0 00:1d 224227621 1042948]: r-xp /home/inline-cs-pseudoprobe.perfbin
2+
3+
20180e
4+
5541f689495641d7
5+
0x201858/0x20180e/P/-/-/0 0x20182b/0x20184d/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-dangling-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
2+
; RUN: FileCheck %s --input-file %t
3+
4+
; CHECK: [main:2 @ foo]:58:0
5+
; CHECK-NEXT: 2: 15
6+
; CHECK-NEXT: 3: 14
7+
; CHECK-NEXT: 5: 14
8+
; CHECK-NEXT: 6: 15
9+
; CHECK-NEXT: !CFGChecksum: 138950591924
10+
; CHECK:[main:2 @ foo:8 @ bar]:1:0
11+
; CHECK-NEXT: 2: 18446744073709551615
12+
; CHECK-NEXT: 3: 18446744073709551615
13+
; CHECK-NEXT: 4: 1
14+
; CHECK-NEXT: !CFGChecksum: 72617220756
15+
16+
17+
; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter:
18+
; CHECK-UNWINDER-EMPTY:
19+
; CHECK-UNWINDER-NEXT: (800, 82b): 14
20+
; CHECK-UNWINDER-NEXT: (84d, 858): 1
21+
22+
; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Branch Counter:
23+
; CHECK-UNWINDER-EMPTY:
24+
; CHECK-UNWINDER-NEXT: (82b, 800): 14
25+
; CHECK-UNWINDER-NEXT: (82b, 84d): 1
26+
; CHECK-UNWINDER-NEXT: (858, 80e): 1
27+
28+
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
29+
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
30+
; -g test.c -o a.out
31+
32+
#include <stdio.h>
33+
34+
int bar(int x, int y) {
35+
if (x % 3) {
36+
return x - y;
37+
}
38+
return x + y;
39+
}
40+
41+
void foo() {
42+
int s, i = 0;
43+
while (i++ < 4000 * 4000)
44+
if (i % 91) s = bar(i, s); else s += 30;
45+
printf("sum is %d\n", s);
46+
}
47+
48+
int main() {
49+
foo();
50+
return 0;
51+
}

llvm/test/tools/llvm-profgen/merge-cold-profile.test

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
; Used the data from recursion-compression.test, refer it for the unmerged output
2-
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8
3-
; RUN: FileCheck %s --input-file %t
2+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --csprof-cold-thres=8
3+
; RUN: FileCheck %s --input-file %t1
44

55
; Test --csprof-trim-cold-context=0
6-
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0
7-
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD
6+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0
7+
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD
88

99
; Test --csprof-merge-cold-context=0
10-
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0
11-
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNMERGED
10+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0
11+
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
1212

1313
; CHECK: [fa]:14:4
1414
; CHECK-NEXT: 1: 4
15+
; CHECK-NEXT: 2: 18446744073709551615
1516
; CHECK-NEXT: 3: 4
1617
; CHECK-NEXT: 4: 2
1718
; CHECK-NEXT: 5: 1
@@ -37,6 +38,7 @@
3738
; CHECK-KEEP-COLD-NEXT: !Attributes: 0
3839
; CHECK-KEEP-COLD-NEXT:[fa]:14:4
3940
; CHECK-KEEP-COLD-NEXT: 1: 4
41+
; CHECK-KEEP-COLD-NEXT: 2: 18446744073709551615
4042
; CHECK-KEEP-COLD-NEXT: 3: 4
4143
; CHECK-KEEP-COLD-NEXT: 4: 2
4244
; CHECK-KEEP-COLD-NEXT: 5: 1

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -510,22 +510,19 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
510510
// Extract the top frame probes by looking up each address among the range in
511511
// the Address2ProbeMap
512512
extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
513+
std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
513514
for (auto PI : ProbeCounter) {
514515
const PseudoProbe *Probe = PI.first;
515516
uint64_t Count = PI.second;
517+
// Ignore dangling probes since they will be reported later if needed.
518+
if (Probe->isDangling())
519+
continue;
516520
FunctionSamples &FunctionProfile =
517521
getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
518-
519-
// Use InvalidProbeCount(UINT64_MAX) to mark sample count for a dangling
520-
// probe. Dangling probes are the probes associated to an empty block. With
521-
// this place holder, sample count on dangling probe will not be trusted by
522-
// the compiler and it will rely on the counts inference algorithm to get
523-
// the probe a reasonable count.
524-
if (Probe->isDangling()) {
525-
FunctionProfile.addBodySamplesForProbe(
526-
Probe->Index, FunctionSamples::InvalidProbeCount);
527-
continue;
528-
}
522+
// Record the current frame and FunctionProfile whenever samples are
523+
// collected for non-danglie probes. This is for reporting all of the
524+
// dangling probes of the frame later.
525+
FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
529526
FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
530527
FunctionProfile.addTotalSamples(Count);
531528
if (Probe->isEntry()) {
@@ -554,6 +551,22 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
554551
FunctionProfile.getContext().getNameWithoutContext(), Count);
555552
}
556553
}
554+
555+
// Report dangling probes for frames that have real samples collected.
556+
// Dangling probes are the probes associated to an empty block. With this
557+
// place holder, sample count on a dangling probe will not be trusted by the
558+
// compiler and we will rely on the counts inference algorithm to get the
559+
// probe a reasonable count. Use InvalidProbeCount to mark sample count for
560+
// a dangling probe.
561+
for (auto &I : FrameSamples) {
562+
auto *FunctionProfile = I.second;
563+
for (auto *Probe : I.first->getProbes()) {
564+
if (Probe->isDangling()) {
565+
FunctionProfile->addBodySamplesForProbe(
566+
Probe->Index, FunctionSamples::InvalidProbeCount);
567+
}
568+
}
569+
}
557570
}
558571
}
559572

llvm/tools/llvm-profgen/PseudoProbe.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
198198
// A list of NUM_INLINED_FUNCTIONS entries describing each of the
199199
// inlined callees. Each record contains:
200200
// INLINE SITE
201-
// GUID of the inlinee (uint64)
202201
// Index of the callsite probe (ULEB128)
203202
// FUNCTION BODY
204203
// A FUNCTION BODY entry describing the inlined function.
@@ -214,8 +213,11 @@ void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
214213
uint32_t Index = 0;
215214
// A DFS-based decoding
216215
while (Data < End) {
217-
// Read inline site for inlinees
218-
if (Root != Cur) {
216+
if (Root == Cur) {
217+
// Use a sequential id for top level inliner.
218+
Index = Root->getChildren().size();
219+
} else {
220+
// Read inline site for inlinees
219221
Index = readUnsignedNumber<uint32_t>();
220222
}
221223
// Switch/add to a new tree node(inlinee)
@@ -243,10 +245,10 @@ void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
243245
Addr = readUnencodedNumber<int64_t>();
244246
}
245247
// Populate Address2ProbesMap
246-
std::vector<PseudoProbe> &ProbeVec = Address2ProbesMap[Addr];
247-
ProbeVec.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr,
248-
Cur);
249-
Cur->addProbes(&ProbeVec.back());
248+
auto &Probes = Address2ProbesMap[Addr];
249+
Probes.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr,
250+
Cur);
251+
Cur->addProbes(&Probes.back());
250252
LastAddr = Addr;
251253
}
252254

@@ -298,7 +300,7 @@ PseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
298300
auto It = Address2ProbesMap.find(Address);
299301
if (It == Address2ProbesMap.end())
300302
return nullptr;
301-
const std::vector<PseudoProbe> &Probes = It->second;
303+
const auto &Probes = It->second;
302304

303305
const PseudoProbe *CallProbe = nullptr;
304306
for (const auto &Probe : Probes) {

llvm/tools/llvm-profgen/PseudoProbe.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ class PseudoProbeInlineTree {
4545
return std::get<0>(Site) ^ std::get<1>(Site);
4646
}
4747
};
48-
std::unordered_map<InlineSite, std::unique_ptr<PseudoProbeInlineTree>,
49-
InlineSiteHash>
50-
Children;
48+
using InlinedProbeTreeMap =
49+
std::unordered_map<InlineSite, std::unique_ptr<PseudoProbeInlineTree>,
50+
InlineSiteHash>;
51+
InlinedProbeTreeMap Children;
5152

5253
public:
5354
// Inlinee function GUID
@@ -71,6 +72,8 @@ class PseudoProbeInlineTree {
7172
return Ret.first->second.get();
7273
}
7374

75+
InlinedProbeTreeMap &getChildren() { return Children; }
76+
std::vector<PseudoProbe *> &getProbes() { return ProbeVector; }
7477
void addProbes(PseudoProbe *Probe) { ProbeVector.push_back(Probe); }
7578
// Return false if it's a dummy inline site
7679
bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
@@ -91,7 +94,7 @@ struct PseudoProbeFuncDesc {
9194
// GUID to PseudoProbeFuncDesc map
9295
using GUIDProbeFunctionMap = std::unordered_map<uint64_t, PseudoProbeFuncDesc>;
9396
// Address to pseudo probes map.
94-
using AddressProbesMap = std::unordered_map<uint64_t, std::vector<PseudoProbe>>;
97+
using AddressProbesMap = std::unordered_map<uint64_t, std::list<PseudoProbe>>;
9598

9699
/*
97100
A pseudo probe has the format like below:
@@ -135,6 +138,8 @@ struct PseudoProbe {
135138
bool isDirectCall() const { return Type == PseudoProbeType::DirectCall; }
136139
bool isCall() const { return isIndirectCall() || isDirectCall(); }
137140

141+
PseudoProbeInlineTree *getInlineTreeNode() const { return InlineTree; }
142+
138143
// Get the inlined context by traversing current inline tree backwards,
139144
// each tree node has its InlineSite which is taken as the context.
140145
// \p ContextStack is populated in root to leaf order

0 commit comments

Comments
 (0)