Skip to content

Commit b1a45c6

Browse files
committed
[llvm-profgen] Ignore branch count against outline function
For some transformations like hot-cold split or coro split, it can outline its part of function ranges. Since sample loader is the early stage of backend and no split happens at that time, compiler can't recognize those function, so in llvm-profgen we should attribute the sample to the original function. This is already done for the body range samples since we use the symbols from dwarf which is created before the split. But for branch samples, the call from master function to its outlined function is actually not a call to the original function, we shouldn't add head/callsie samples for it. So instead of dwarf symbol, we use the symbols from symbol table and ignore those functions with special suffixes(like `.cold` ,`.resume`) for accumulating the callsite/head samples. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D110864
1 parent 6727832 commit b1a45c6

File tree

8 files changed

+179
-9
lines changed

8 files changed

+179
-9
lines changed
Binary file not shown.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
401d4a 0x401d4a/0x402b60/P/-/-/14 0x401d04/0x401d43/M/-/-/1 0x401ceb/0x401cf0/P/-/-/1 0x401f74/0x401ce3/P/-/-/1 0x401f6f/0x401f74/P/-/-/1 0x401aff/0x401f60/P/-/-/10 0x402df6/0x401a50/P/-/-/2 0x402c63/0x402de0/P/-/-/1 0x402c51/0x402c5c/P/-/-/1 0x402dba/0x402c4f/P/-/-/3 0x402c4a/0x402da0/P/-/-/2 0x401999/0x402c30/P/-/-/1 0x4019d5/0x401995/P/-/-/1 0x4019c2/0x4019c7/P/-/-/1 0x402cbc/0x4019bf/P/-/-/2 0x402e3e/0x402cb5/P/-/-/2 0x402cb0/0x402e20/P/-/-/2 0x4019ba/0x402ca0/P/-/-/1 0x4019ab/0x4019b6/P/-/-/3 0x4019a1/0x4019a6/P/-/-/1 0x402c95/0x40199e/P/-/-/5 0x402c79/0x402c88/P/-/-/3 0x402e1d/0x402c74/P/-/-/14 0x402c6f/0x402e00/P/-/-/1 0x402dfd/0x402c68/P/-/-/1 0x401f5f/0x402df8/P/-/-/1 0x401f4f/0x401f54/P/-/-/1 0x401d17/0x401f4f/P/-/-/1 0x401d0a/0x401d0f/P/-/-/3 0x401cde/0x401cf0/P/-/-/1 0x402b2d/0x401cd6/P/-/-/2 0x401cd1/0x402b20/P/-/-/9
2+
401c7b 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/15 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/15 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/10 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/11 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/9 0x401c34/0x401c3f/P/-/-/1 0x401c7b/0x401c2d/P/-/-/13 0x401c34/0x401c3f/P/-/-/1
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
4004f0
2+
5541f689495641d7
3+
0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/1 0x400633/0x400516/P/-/-/1 0x4004da/0x400631/P/-/-/2 0x40062c/0x4004c0/P/-/-/1 0x40050d/0x40062a/P/-/-/3 0x40051f/0x4004f0/P/-/-/5 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/3 0x40051f/0x4004f0/P/-/-/4 0x40051f/0x4004f0/P/-/-/5
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/coroutine.perfscript --binary=%S/Inputs/coroutine.perfbin --output=%t
2+
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
3+
4+
; Check that the head sample count for ticker is 0.
5+
; CHECK: _Z6tickeri:353:0
6+
; CHECK-NOT: _Z6tickeri.resume
7+
8+
9+
/*
10+
* Inputs/coroutine.perfbin is generated by compiling the following source code:
11+
* clang++ coroutine.cpp -std=c++2a -g2 -o coroutine
12+
*/
13+
14+
#include <cstdint>
15+
#include <cstdlib>
16+
#include <ctime>
17+
#include <experimental/coroutine>
18+
#include <iostream>
19+
20+
struct task {
21+
struct promise_type {
22+
task get_return_object() { return {}; }
23+
std::experimental::suspend_never initial_suspend() { return {}; }
24+
std::experimental::suspend_never final_suspend() noexcept { return {}; }
25+
void return_void() {}
26+
void unhandled_exception() {}
27+
};
28+
};
29+
30+
template <typename T>
31+
struct generator {
32+
struct promise_type;
33+
using handle = std::experimental::coroutine_handle<promise_type>;
34+
struct promise_type {
35+
int current_value;
36+
static auto get_return_object_on_allocation_failure() { return generator{nullptr}; }
37+
auto get_return_object() { return generator{handle::from_promise(*this)}; }
38+
auto initial_suspend() { return std::experimental::suspend_always{}; }
39+
auto final_suspend() { return std::experimental::suspend_always{}; }
40+
void unhandled_exception() { std::terminate(); }
41+
void return_void() {}
42+
auto yield_value(int value) {
43+
current_value = value;
44+
return std::experimental::suspend_always{};
45+
}
46+
};
47+
bool move_next() { return coro ? (coro.resume(), !coro.done()) : false; }
48+
int current_value() { return coro.promise().current_value; }
49+
generator(generator const &) = delete;
50+
generator(generator &&rhs) : coro(rhs.coro) { rhs.coro = nullptr; }
51+
~generator() {
52+
if (coro)
53+
coro.destroy();
54+
}
55+
56+
private:
57+
generator(handle h) : coro(h) {}
58+
handle coro;
59+
};
60+
61+
generator<int> ticker(int count) {
62+
for (int i = 0; i < count; ++i) {
63+
srand(time(NULL));
64+
uint32_t a = rand() % 10 + 1;
65+
uint32_t b = rand() % 10 + 1;
66+
uint64_t c = 0;
67+
for (int i = 0; i < 1500; ++i) {
68+
c = ((uint64_t)a) + b;
69+
a = b;
70+
b = c % 2147483648ULL;
71+
}
72+
co_yield a;
73+
}
74+
}
75+
76+
int main() {
77+
auto g = ticker(500000);
78+
uint64_t ans = 0;
79+
while (g.move_next()) {
80+
ans += g.current_value();
81+
}
82+
std::cout << ans << "\n";
83+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/func-split.perfscript --binary=%S/Inputs/func-split.perfbin --output=%t
2+
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
3+
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/func-split.perfscript --binary=%S/Inputs/func-split.perfbin --output=%t --ignore-stack-samples
4+
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
5+
6+
;CHECK: [foo]:408:0
7+
;CHECK: 2.1: 27
8+
;CHECK: 3: 27
9+
;CHECK: 3.1: 2 bar:2
10+
;CHECK: 3.2: 26
11+
;CHECK: [foo:3.1 @ bar]:8:0
12+
;CHECK: 1: 1
13+
;CHECK: 5: 1
14+
;CHECK: [bar]:0:1
15+
16+
;CHECK-NOT: foo.cold
17+
18+
;CHECK-STRIP-CTX: foo:408:0
19+
;CHECK-STRIP-CTX: 0: 0
20+
;CHECK-STRIP-CTX: 2.1: 27
21+
;CHECK-STRIP-CTX: 3: 27
22+
;CHECK-STRIP-CTX: 3.1: 1 bar:1
23+
;CHECK-STRIP-CTX: 3.2: 26
24+
;CHECK-STRIP-CTX: 4: 0
25+
;CHECK-STRIP-CTX: bar:8:1
26+
;CHECK-STRIP-CTX: 1: 1
27+
;CHECK-STRIP-CTX: 5: 1
28+
29+
;CHECK-STRIP-CTX-NOT: foo.cold
30+
31+
32+
; clang -g -O3 -fdebug-info-for-profiling func-split.c -mllvm -mfs-count-threshold=0
33+
; -fprofile-sample-use=profile.txt -fno-inline -mllvm --enable-split-machine-functions=1
34+
35+
#include <stdio.h>
36+
37+
int bar(int x, int y) {
38+
if (x % 3) {
39+
return x - y;
40+
}
41+
return x + y;
42+
}
43+
44+
void foo() {
45+
int s, i = 0;
46+
while (i++ < 4000 * 4000)
47+
if (i % 91 == 0) s = bar(i, s); else s += 30;
48+
printf("sum is %d\n", s);
49+
}
50+
51+
int main() {
52+
foo();
53+
return 0;
54+
}
55+
56+
; profile.txt:
57+
58+
foo:106269:0
59+
2.1: 2268
60+
2.2: 2217
61+
3: 2268
62+
3.1: 1 bar:1
63+
3.2: 2192
64+
bar:1032:1
65+
0: 24
66+
1: 24
67+
2: 16
68+
4: 8
69+
5: 24

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,24 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
397397
}
398398
}
399399

400+
static bool isOutlinedFunction(StringRef CalleeName) {
401+
// Check whether it's from hot-cold func split or coro split.
402+
return CalleeName.find(".resume") != StringRef::npos ||
403+
CalleeName.find(".cold") != StringRef::npos;
404+
}
405+
406+
StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
407+
// Get the callee name by branch target if it's a call branch.
408+
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
409+
Binary->getFuncFromStartOffset(TargetOffset));
410+
411+
// We won't accumulate sample count againt outlined function.
412+
if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName))
413+
return StringRef();
414+
415+
return CalleeName;
416+
}
417+
400418
void ProfileGenerator::populateBoundarySamplesForAllFunctions(
401419
const BranchSample &BranchCounters) {
402420
for (auto Entry : BranchCounters) {
@@ -405,9 +423,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
405423
uint64_t Count = Entry.second;
406424
assert(Count != 0 && "Unexpected zero weight branch");
407425

408-
// Get the callee name by branch target if it's a call branch.
409-
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
410-
Binary->getFuncFromStartOffset(TargetOffset));
426+
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
411427
if (CalleeName.size() == 0)
412428
continue;
413429
// Record called target sample and its count.
@@ -551,9 +567,7 @@ void CSProfileGenerator::populateBoundarySamplesForFunction(
551567
uint64_t Count = Entry.second;
552568
assert(Count != 0 && "Unexpected zero weight branch");
553569

554-
// Get the callee name by branch target if it's a call branch
555-
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
556-
Binary->getFuncFromStartOffset(TargetOffset));
570+
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
557571
if (CalleeName.size() == 0)
558572
continue;
559573

@@ -804,8 +818,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
804818
getFunctionProfileForLeafProbe(ContextStack, CallProbe);
805819
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
806820
FunctionProfile.addTotalSamples(Count);
807-
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
808-
Binary->getFuncFromStartOffset(TargetOffset));
821+
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
809822
if (CalleeName.size() == 0)
810823
continue;
811824
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,

llvm/tools/llvm-profgen/ProfileGenerator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class ProfileGeneratorBase {
7474
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
7575
const SampleContextFrame &LeafLoc,
7676
uint64_t Count);
77-
77+
StringRef getCalleeNameForOffset(uint64_t TargetOffset);
7878
// Used by SampleProfileWriter
7979
SampleProfileMap ProfileMap;
8080

0 commit comments

Comments
 (0)