Skip to content

Commit 8a0406d

Browse files
committed
[llvm-profgen] Filter out invalid LBR ranges.
The profiler can sometimes give us a LBR trace that implicates bogus code ranges. For example, 0xc5acb56/0xc66c6c0 0xc628195/0xf31fbb0 0xc611261/0xc628130 0xc5c1a21/0xc6111c0 0x1f7edfd3/0xc5c3a50 0xc5c154f/0x1f7edec0 0xe8eed07/0xc5c11e0 , note that the first two pairs are supposed to form a linear execution range, in this case, it is [0xf31fbb0, 0xc5acb56] , which doesn't make sense. Such bogus ranges should be ruled out to avoid generating a bad profile. I'm fixing this for both CS and non-CS cases. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D123271
1 parent 208f93c commit 8a0406d

File tree

4 files changed

+146
-23
lines changed

4 files changed

+146
-23
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
PERF_RECORD_MMAP2 1243676/1243676: [0x201000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin
2+
3+
20179e
4+
2017f9
5+
7f83e84e7793
6+
5541f689495641d7
7+
0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017d8/0x2017e3/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0
8+
9+
10+
// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; In the perfscript input, the consecutive branch pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf].
2+
; We are testing only the invalid range is dropped to avoid bogus instruction ranges. All other ranges and all branch samples should be kept.
3+
;
4+
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --skip-symbolization --ignore-stack-samples --use-offset=0
5+
; RUN: FileCheck %s --input-file %t1 --check-prefix=NOCS
6+
7+
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --skip-symbolization --use-offset=0
8+
; RUN: FileCheck %s --input-file %t2 --check-prefix=CS
9+
10+
11+
; NOCS: 4
12+
; NOCS-NEXT: 201760-20177f:2
13+
; NOCS-NEXT: 20179e-2017bf:1
14+
; NOCS-NEXT: 2017c4-2017cf:1
15+
; NOCS-NEXT: 2017c4-2017d8:1
16+
; NOCS-NEXT: 4
17+
; NOCS-NEXT: 20177f->2017c4:2
18+
; NOCS-NEXT: 2017bf->201760:2
19+
; NOCS-NEXT: 2017cf->20179e:2
20+
; NOCS-NEXT: 2017d8->2017e3:1
21+
22+
23+
; CS: []
24+
; CS-NEXT: 3
25+
; CS-NEXT: 201760-20177f:1
26+
; CS-NEXT: 20179e-2017bf:1
27+
; CS-NEXT: 2017c4-2017d8:1
28+
; CS-NEXT: 4
29+
; CS-NEXT: 20177f->2017c4:1
30+
; CS-NEXT: 2017bf->201760:1
31+
; CS-NEXT: 2017cf->20179e:1
32+
; CS-NEXT: 2017d8->2017e3:1
33+
; CS-NEXT: [0x7f4]
34+
; CS-NEXT: 1
35+
; CS-NEXT: 2017c4-2017cf:1
36+
; CS-NEXT: 2
37+
; CS-NEXT: 2017bf->201760:1
38+
; CS-NEXT: 2017cf->20179e:1
39+
; CS-NEXT: [0x7f4 @ 0x7bf]
40+
; CS-NEXT: 1
41+
; CS-NEXT: 201760-20177f:1
42+
; CS-NEXT: 1
43+
; CS-NEXT: 20177f->2017c4:1
44+
45+
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
46+
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
47+
; -g test.c -o a.out
48+
49+
#include <stdio.h>
50+
51+
int bar(int x, int y) {
52+
if (x % 3) {
53+
return x - y;
54+
}
55+
return x + y;
56+
}
57+
58+
void foo() {
59+
int s, i = 0;
60+
while (i++ < 4000 * 4000)
61+
if (i % 91) s = bar(i, s); else s += 30;
62+
printf("sum is %d\n", s);
63+
}
64+
65+
int main() {
66+
foo();
67+
return 0;
68+
}

llvm/tools/llvm-profgen/PerfReader.cpp

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
100100
InstructionPointer &IP = State.InstPtr;
101101
uint64_t Target = State.getCurrentLBRTarget();
102102
uint64_t End = IP.Address;
103+
if (Target > End) {
104+
// Skip unwinding the rest of LBR trace when a bogus range is seen.
105+
State.setInvalid();
106+
return;
107+
}
103108
if (Binary->usePseudoProbes()) {
104109
// We don't need to top frame probe since it should be extracted
105110
// from the range.
@@ -303,19 +308,28 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
303308
// extra frame when processing the return paired with this call.
304309
unwindCall(State);
305310
} else if (isReturnState(State)) {
306-
// Unwind returns - check whether the IP is indeed at a return instruction
311+
// Unwind returns - check whether the IP is indeed at a return
312+
// instruction
307313
unwindReturn(State);
308-
} else {
314+
} else if (isValidState(State)) {
309315
// Unwind branches
310-
// For regular intra function branches, we only need to record branch with
311-
// context. For an artificial branch cross function boundaries, we got an
312-
// issue with returning to external code. Take the two LBR enties for
313-
// example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
314-
// we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
315-
// return to bar. Here we detect and treat this case as BRANCH instead of
316-
// RETURN which only update the source address.
316+
// For regular intra function branches, we only need to record branch
317+
// with context. For an artificial branch cross function boundaries, we
318+
// got an issue with returning to external code. Take the two LBR enties
319+
// for example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf
320+
// reader, we only get[foo:8(RETURN), bar:1], unwinder will be confused
321+
// like foo return to bar. Here we detect and treat this case as BRANCH
322+
// instead of RETURN which only update the source address.
317323
unwindBranch(State);
324+
} else {
325+
// Skip unwinding the rest of LBR trace. Reset the stack and update the
326+
// state so that the rest of the trace can still be processed as if they
327+
// do not have stack samples.
328+
State.clearCallStack();
329+
State.InstPtr.update(State.getCurrentLBRSource());
330+
State.pushFrame(State.InstPtr.Address);
318331
}
332+
319333
State.advanceLBR();
320334
// Record `branch` with calling context after unwinding.
321335
recordBranchCount(Branch, State, Repeat);
@@ -720,7 +734,9 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
720734
// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
721735
//
722736
std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
723-
737+
#ifndef NDEBUG
738+
Sample->Linenum = TraceIt.getLineNumber();
739+
#endif
724740
// Parsing call stack and populate into PerfSample.CallStack
725741
if (!extractCallstack(TraceIt, Sample->CallStack)) {
726742
// Skip the next LBR line matched current call stack
@@ -915,8 +931,10 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
915931
// If this not the first LBR, update the range count between TO of current
916932
// LBR and FROM of next LBR.
917933
uint64_t StartOffset = TargetOffset;
918-
if (EndOffeset != 0)
919-
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
934+
if (EndOffeset != 0) {
935+
if (StartOffset <= EndOffeset)
936+
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
937+
}
920938
EndOffeset = SourceOffset;
921939
}
922940
}
@@ -1161,41 +1179,55 @@ void PerfScriptReader::warnInvalidRange() {
11611179
const char *RangeCrossFuncMsg =
11621180
"Fall through range should not cross function boundaries, likely due to "
11631181
"profile and binary mismatch.";
1182+
const char *BogusRangeMsg = "Range start is after range end.";
11641183

1184+
uint64_t TotalRangeNum = 0;
11651185
uint64_t InstNotBoundary = 0;
11661186
uint64_t UnmatchedRange = 0;
11671187
uint64_t RangeCrossFunc = 0;
1188+
uint64_t BogusRange = 0;
11681189

11691190
for (auto &I : Ranges) {
11701191
uint64_t StartOffset = I.first.first;
11711192
uint64_t EndOffset = I.first.second;
1193+
TotalRangeNum += I.second;
11721194

11731195
if (!Binary->offsetIsCode(StartOffset) ||
11741196
!Binary->offsetIsTransfer(EndOffset)) {
1175-
InstNotBoundary++;
1197+
InstNotBoundary += I.second;
11761198
WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg);
11771199
}
11781200

11791201
auto *FRange = Binary->findFuncRangeForOffset(StartOffset);
11801202
if (!FRange) {
1181-
UnmatchedRange++;
1203+
UnmatchedRange += I.second;
11821204
WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg);
11831205
continue;
11841206
}
11851207

11861208
if (EndOffset >= FRange->EndOffset) {
1187-
RangeCrossFunc++;
1209+
RangeCrossFunc += I.second;
11881210
WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
11891211
}
1212+
1213+
if (StartOffset > EndOffset) {
1214+
BogusRange += I.second;
1215+
WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
1216+
}
11901217
}
11911218

1192-
uint64_t TotalRangeNum = Ranges.size();
1193-
emitWarningSummary(InstNotBoundary, TotalRangeNum,
1194-
"of profiled ranges are not on instruction boundary.");
1195-
emitWarningSummary(UnmatchedRange, TotalRangeNum,
1196-
"of profiled ranges do not belong to any functions.");
1197-
emitWarningSummary(RangeCrossFunc, TotalRangeNum,
1198-
"of profiled ranges do cross function boundaries.");
1219+
emitWarningSummary(
1220+
InstNotBoundary, TotalRangeNum,
1221+
"of samples are from ranges that are not on instruction boundary.");
1222+
emitWarningSummary(
1223+
UnmatchedRange, TotalRangeNum,
1224+
"of samples are from ranges that do not belong to any functions.");
1225+
emitWarningSummary(
1226+
RangeCrossFunc, TotalRangeNum,
1227+
"of samples are from ranges that do cross function boundaries.");
1228+
emitWarningSummary(
1229+
BogusRange, TotalRangeNum,
1230+
"of samples are from ranges that have range start after range end.");
11991231
}
12001232

12011233
void PerfScriptReader::parsePerfTraces() {

llvm/tools/llvm-profgen/PerfReader.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,10 @@ struct PerfSample {
197197
}
198198

199199
#ifndef NDEBUG
200+
uint64_t Linenum = 0;
201+
200202
void print() const {
203+
dbgs() << "Line " << Linenum << "\n";
201204
dbgs() << "LBR stack\n";
202205
printLBRStack(LBRStack);
203206
dbgs() << "Call stack\n";
@@ -255,6 +258,9 @@ struct UnwindState {
255258
const SmallVector<LBREntry, 16> &LBRStack;
256259
// Used to iterate the address range
257260
InstructionPointer InstPtr;
261+
// Indicate whether unwinding is currently in a bad state which requires to
262+
// skip all subsequent unwinding.
263+
bool Invalid = false;
258264
UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
259265
: Binary(Binary), LBRStack(Sample->LBRStack),
260266
InstPtr(Binary, Sample->CallStack.front()) {
@@ -284,6 +290,7 @@ struct UnwindState {
284290
"IP should align with context leaf");
285291
}
286292

293+
void setInvalid() { Invalid = true; }
287294
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
288295
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
289296
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
@@ -476,10 +483,14 @@ class VirtualUnwinder {
476483
bool isCallState(UnwindState &State) const {
477484
// The tail call frame is always missing here in stack sample, we will
478485
// use a specific tail call tracker to infer it.
479-
return Binary->addressIsCall(State.getCurrentLBRSource());
486+
return isValidState(State) &&
487+
Binary->addressIsCall(State.getCurrentLBRSource());
480488
}
481489

482490
bool isReturnState(UnwindState &State) const {
491+
if (!isValidState(State))
492+
return false;
493+
483494
// Simply check addressIsReturn, as ret is always reliable, both for
484495
// regular call and tail call.
485496
if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
@@ -497,6 +508,8 @@ class VirtualUnwinder {
497508
return (CallAddr != 0);
498509
}
499510

511+
bool isValidState(UnwindState &State) const { return !State.Invalid; }
512+
500513
void unwindCall(UnwindState &State);
501514
void unwindLinear(UnwindState &State, uint64_t Repeat);
502515
void unwindReturn(UnwindState &State);

0 commit comments

Comments
 (0)