Skip to content

[BOLT] Support profile density with basic samples #137644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bolt/include/bolt/Profile/DataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@ struct FuncSampleData {
/// Get the number of samples recorded in [Start, End)
uint64_t getSamples(uint64_t Start, uint64_t End) const;

/// Returns the total number of samples recorded in this function.
uint64_t getSamples() const;

/// Aggregation helper
DenseMap<uint64_t, size_t> Index;

Expand Down
23 changes: 15 additions & 8 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
processMemEvents();

// Mark all functions with registered events as having a valid profile.
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
if (FuncBranchData *FBD = getBranchData(BF)) {
BF.markProfiled(BinaryFunction::PF_LBR);
BF.RawBranchCount = FBD->getNumExecutedBranches();
} else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
BF.markProfiled(BinaryFunction::PF_SAMPLE);
BF.RawBranchCount = FSD->getSamples();
}
}

Expand Down Expand Up @@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,

bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
// To record executed bytes, use basic block size as is regardless of BAT.
uint64_t BlockSize = 0;
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
Address - OrigFunc.getAddress()))
BlockSize = BB->getOriginalSize();

BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
NumColdSamples += Count;
// Attach executed bytes to parent function in case of cold fragment.
Func.SampleCountInBytes += Count * BlockSize;

auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
Expand Down
7 changes: 7 additions & 0 deletions bolt/lib/Profile/DataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
return Result;
}

uint64_t FuncSampleData::getSamples() const {
uint64_t Result = 0;
for (const SampleInfo &I : Data)
Result += I.Hits;
return Result;
}

void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
auto Iter = Index.find(Offset);
if (Iter == Index.end()) {
Expand Down
1 change: 1 addition & 0 deletions bolt/test/perf2bolt/perf_test.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s

CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.

RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
Expand Down
Loading