Skip to content

Commit 5bf191a

Browse files
committed
[llvm-profgen] Fix index out of bounds error while using ip.advance
Previously we assume there're some non-executing sections at the bottom of the text section so that we won't hit the array's bound. But on BOLTed binary, it turned out .bolt section is at the bottom of text section which can be profiled, then it crash llvm-profgen. This change try to fix it. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D113238
1 parent 845561e commit 5bf191a

File tree

5 files changed

+85
-30
lines changed

5 files changed

+85
-30
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
3
2+
0-0:1
3+
f-fff0:1
4+
ffff-ffff:1
5+
0

llvm/test/tools/llvm-profgen/inline-noprobe.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
; RUN: echo -e "0\n0" > %t
1010
; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --fill-zero-for-all-funcs
1111
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-ALL-ZERO
12+
; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1
13+
; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB
1214

1315
CHECK: main:188:0
1416
CHECK: 0: 0
@@ -58,6 +60,33 @@ CHECK-RAW-PROFILE-NEXT: 2
5860
CHECK-RAW-PROFILE-NEXT: 677->650:21
5961
CHECK-RAW-PROFILE-NEXT: 691->669:43
6062

63+
;CHECK-OB: foo:8:0
64+
;CHECK-OB: 0: 1
65+
;CHECK-OB: 2.1: 1
66+
;CHECK-OB: 3: 1
67+
;CHECK-OB: 3.2: 1
68+
;CHECK-OB: 4: 1
69+
;CHECK-OB: 3.1: bar:1
70+
;CHECK-OB: 1: 1
71+
;CHECK-OB: 3.2: bar:2
72+
;CHECK-OB: 1: 1
73+
;CHECK-OB: 7: 1
74+
;CHECK-OB: main:8:0
75+
;CHECK-OB: 0: 1
76+
;CHECK-OB: 2: 1
77+
;CHECK-OB: 1: foo:6
78+
;CHECK-OB: 2.1: 1
79+
;CHECK-OB: 3: 1
80+
;CHECK-OB: 3.2: 1
81+
;CHECK-OB: 4: 1
82+
;CHECK-OB: 3.1: bar:1
83+
;CHECK-OB: 1: 1
84+
;CHECK-OB: 3.2: bar:1
85+
;CHECK-OB: 1: 1
86+
;CHECK-OB: bar:2:0
87+
;CHECK-OB: 1: 1
88+
;CHECK-OB: 5: 1
89+
6190
; original code:
6291
; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out
6392
#include <stdio.h>

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
355355
if (FillZeroForAllFuncs) {
356356
for (auto &FuncI : Binary->getAllBinaryFunctions()) {
357357
for (auto &R : FuncI.second.Ranges) {
358-
Ranges[{R.first, R.second}] += 0;
358+
Ranges[{R.first, R.second - 1}] += 0;
359359
}
360360
}
361361
} else {
@@ -385,7 +385,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
385385
// Disjoint ranges may have range in the middle of two instr,
386386
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
387387
// can be Addr1+1 to Addr2-1. We should ignore such range.
388-
while (IP.Address <= RangeEnd) {
388+
if (IP.Address > RangeEnd)
389+
continue;
390+
391+
do {
389392
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
390393
const SampleContextFrameVector &FrameVec =
391394
Binary->getFrameLocationStack(Offset);
@@ -394,9 +397,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
394397
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
395398
Count);
396399
}
397-
// Move to next IP within the range.
398-
IP.advance();
399-
}
400+
} while (IP.advance() && IP.Address <= RangeEnd);
400401
}
401402
}
402403

@@ -538,17 +539,17 @@ void CSProfileGenerator::populateBodySamplesForFunction(
538539
// Disjoint ranges may have range in the middle of two instr,
539540
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
540541
// can be Addr1+1 to Addr2-1. We should ignore such range.
541-
while (IP.Address <= RangeEnd) {
542+
if (IP.Address > RangeEnd)
543+
continue;
544+
545+
do {
542546
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
543547
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
544548
if (LeafLoc.hasValue()) {
545549
// Recording body sample for this specific context
546550
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
547551
}
548-
549-
// Move to next IP within the range
550-
IP.advance();
551-
}
552+
} while (IP.advance() && IP.Address <= RangeEnd);
552553
}
553554
}
554555

@@ -714,14 +715,13 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
714715
continue;
715716

716717
InstructionPointer IP(Binary, RangeBegin, true);
717-
718718
// Disjoint ranges may have range in the middle of two instr,
719719
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
720720
// can be Addr1+1 to Addr2-1. We should ignore such range.
721721
if (IP.Address > RangeEnd)
722722
continue;
723723

724-
while (IP.Address <= RangeEnd) {
724+
do {
725725
const AddressProbesMap &Address2ProbesMap =
726726
Binary->getAddress2ProbesMap();
727727
auto It = Address2ProbesMap.find(IP.Address);
@@ -732,9 +732,7 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
732732
ProbeCounter[&Probe] += Count;
733733
}
734734
}
735-
736-
IP.advance();
737-
}
735+
} while (IP.advance() && IP.Address <= RangeEnd);
738736
}
739737
}
740738

llvm/tools/llvm-profgen/ProfiledBinary.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -657,22 +657,27 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
657657

658658
void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
659659
uint64_t EndOffset) {
660-
uint32_t Index = getIndexForOffset(StartOffset);
661-
if (CodeAddrOffsets[Index] != StartOffset)
660+
uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
661+
uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
662+
InstructionPointer IP(this, RangeBegin, true);
663+
664+
if (IP.Address != RangeBegin)
662665
WithColor::warning() << "Invalid start instruction at "
663-
<< format("%8" PRIx64, StartOffset) << "\n";
666+
<< format("%8" PRIx64, RangeBegin) << "\n";
667+
668+
if (IP.Address >= RangeEnd)
669+
return;
664670

665-
uint64_t Offset = CodeAddrOffsets[Index];
666-
while (Offset < EndOffset) {
671+
do {
672+
uint64_t Offset = virtualAddrToOffset(IP.Address);
667673
const SampleContextFrameVector &SymbolizedCallStack =
668674
getFrameLocationStack(Offset, UsePseudoProbes);
669675
uint64_t Size = Offset2InstSizeMap[Offset];
670676

671677
// Record instruction size for the corresponding context
672678
FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
673679

674-
Offset = CodeAddrOffsets[++Index];
675-
}
680+
} while (IP.advance() && IP.Address < RangeEnd);
676681
}
677682

678683
InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
@@ -682,18 +687,31 @@ InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
682687
if (RoundToNext) {
683688
// we might get address which is not the code
684689
// it should round to the next valid address
685-
this->Address = Binary->getAddressforIndex(Index);
690+
if (Index >= Binary->getCodeOffsetsSize())
691+
this->Address = UINT64_MAX;
692+
else
693+
this->Address = Binary->getAddressforIndex(Index);
686694
}
687695
}
688696

689-
void InstructionPointer::advance() {
697+
bool InstructionPointer::advance() {
690698
Index++;
699+
if (Index >= Binary->getCodeOffsetsSize()) {
700+
Address = UINT64_MAX;
701+
return false;
702+
}
691703
Address = Binary->getAddressforIndex(Index);
704+
return true;
692705
}
693706

694-
void InstructionPointer::backward() {
707+
bool InstructionPointer::backward() {
708+
if (Index == 0) {
709+
Address = 0;
710+
return false;
711+
}
695712
Index--;
696713
Address = Binary->getAddressforIndex(Index);
714+
return true;
697715
}
698716

699717
void InstructionPointer::update(uint64_t Addr) {

llvm/tools/llvm-profgen/ProfiledBinary.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,24 @@ struct InstructionPointer {
6464
uint64_t Index = 0;
6565
InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
6666
bool RoundToNext = false);
67-
void advance();
68-
void backward();
67+
bool advance();
68+
bool backward();
6969
void update(uint64_t Addr);
7070
};
7171

7272
using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
7373

7474
struct BinaryFunction {
7575
StringRef FuncName;
76+
// End of range is an exclusive bound.
7677
RangesTy Ranges;
7778
};
7879

7980
// Info about function range. A function can be split into multiple
8081
// non-continuous ranges, each range corresponds to one FuncRange.
8182
struct FuncRange {
8283
uint64_t StartOffset;
83-
// EndOffset is a exclusive bound.
84+
// EndOffset is an exclusive bound.
8485
uint64_t EndOffset;
8586
// Function the range belongs to
8687
BinaryFunction *Func;
@@ -105,7 +106,8 @@ struct PrologEpilogTracker {
105106
for (auto I : FuncStartOffsetMap) {
106107
PrologEpilogSet.insert(I.first);
107108
InstructionPointer IP(Binary, I.first);
108-
IP.advance();
109+
if (!IP.advance())
110+
break;
109111
PrologEpilogSet.insert(IP.Offset);
110112
}
111113
}
@@ -115,7 +117,8 @@ struct PrologEpilogTracker {
115117
for (auto Addr : RetAddrs) {
116118
PrologEpilogSet.insert(Addr);
117119
InstructionPointer IP(Binary, Addr);
118-
IP.backward();
120+
if (!IP.backward())
121+
break;
119122
PrologEpilogSet.insert(IP.Offset);
120123
}
121124
}
@@ -336,6 +339,8 @@ class ProfiledBinary {
336339
return offsetToVirtualAddr(CodeAddrOffsets[Index]);
337340
}
338341

342+
size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
343+
339344
bool usePseudoProbes() const { return UsePseudoProbes; }
340345
// Get the index in CodeAddrOffsets for the address
341346
// As we might get an address which is not the code

0 commit comments

Comments
 (0)