Skip to content

[BOLT][BAT] Fix handling of split functions #87569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ class DataAggregator : public DataReader {
/// Aggregation statistics
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
/// with profiling data collected in a bolted binary. For LBRs, incremented
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};

/// Looks into system PATH for Linux Perf and set up the aggregator to use it
Expand All @@ -245,14 +249,12 @@ class DataAggregator : public DataReader {
/// disassembled BinaryFunctions
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address) const;

/// Perform BAT translation for a given \p Func and return the parent
/// BinaryFunction or nullptr.
BinaryFunction *getBATParentFunction(const BinaryFunction &Func) const;

/// Retrieve the location name to be used for samples recorded in \p Func.
/// If doing BAT translation, link cold parts to the hot part names (used by
/// the original binary). \p Count specifies how many samples were recorded
/// at that location, so we can tally total activity in cold areas if we are
/// dealing with profiling data collected in a bolted binary. For LBRs,
/// \p Count should only be used for the source of the branch to avoid
/// counting cold activity twice (one for source and another for destination).
StringRef getLocationName(BinaryFunction &Func, uint64_t Count);
StringRef getLocationName(const BinaryFunction &Func) const;

/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
Expand Down Expand Up @@ -467,9 +469,6 @@ class DataAggregator : public DataReader {
std::error_code writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const;

/// Fixup profile collected on BOLTed binary, namely handle split functions.
void fixupBATProfile(BinaryContext &BC);

/// Filter out binaries based on PID
void filterBinaryMMapInfo();

Expand Down
114 changes: 49 additions & 65 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,6 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
// BAT YAML is handled by DataAggregator since normal YAML output requires
// CFG which is not available in BAT mode.
if (usesBAT()) {
// Postprocess split function profile for BAT
fixupBATProfile(BC);
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
report_error("cannot create output data file", EC);
Expand Down Expand Up @@ -664,18 +662,19 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
/*UseMaxSize=*/true);
}

StringRef DataAggregator::getLocationName(BinaryFunction &Func,
uint64_t Count) {
BinaryFunction *
DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
if (BAT)
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
return getBinaryFunctionContainingAddress(HotAddr);
return nullptr;
}

StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
if (!BAT)
return Func.getOneName();

const BinaryFunction *OrigFunc = &Func;
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
NumColdSamples += Count;
BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
if (HotFunc)
OrigFunc = HotFunc;
}
// If it is a local function, prefer the name containing the file name where
// the local function was declared
for (StringRef AlternativeName : OrigFunc->getNames()) {
Expand All @@ -690,12 +689,17 @@ StringRef DataAggregator::getLocationName(BinaryFunction &Func,
return OrigFunc->getOneName();
}

bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc)
NumColdSamples += Count;

auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func, Count);
StringRef LocName = getLocationName(Func);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
Expand All @@ -715,22 +719,12 @@ bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
FuncBranchData *AggrData = getBranchData(Func);
if (!AggrData) {
AggrData = &NamesToBranches[Func.getOneName()];
AggrData->Name = getLocationName(Func, Count);
AggrData->Name = getLocationName(Func);
setBranchData(Func, AggrData);
}

From -= Func.getAddress();
To -= Func.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
if (BAT) {
From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
}

AggrData->bumpBranchCount(From, To, Count, Mispreds);
return true;
}
Expand All @@ -744,30 +738,24 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
StringRef SrcFunc;
StringRef DstFunc;
if (FromFunc) {
SrcFunc = getLocationName(*FromFunc, Count);
SrcFunc = getLocationName(*FromFunc);
FromAggrData = getBranchData(*FromFunc);
if (!FromAggrData) {
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
FromAggrData->Name = SrcFunc;
setBranchData(*FromFunc, FromAggrData);
}
From -= FromFunc->getAddress();
if (BAT)
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);

recordExit(*FromFunc, From, Mispreds, Count);
}
if (ToFunc) {
DstFunc = getLocationName(*ToFunc, 0);
DstFunc = getLocationName(*ToFunc);
ToAggrData = getBranchData(*ToFunc);
if (!ToAggrData) {
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
ToAggrData->Name = DstFunc;
setBranchData(*ToFunc, ToAggrData);
}
To -= ToFunc->getAddress();
if (BAT)
To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);

recordEntry(*ToFunc, To, Mispreds, Count);
}
Expand All @@ -783,15 +771,32 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}

return Func;
}
return nullptr;
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
recordBranch(*FromFunc, From - FromFunc->getAddress(),
To - FromFunc->getAddress(), Count, Mispreds);
if (FromFunc == ToFunc && To != 0) {
recordBranch(*FromFunc, From, To, Count, Mispreds);
return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
}

Expand Down Expand Up @@ -842,9 +847,14 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(), Count, false);
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
}

return true;
}
Expand Down Expand Up @@ -2273,29 +2283,6 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
return std::error_code();
}

void DataAggregator::fixupBATProfile(BinaryContext &BC) {
for (auto &[FuncName, Branches] : NamesToBranches) {
BinaryData *BD = BC.getBinaryDataByName(FuncName);
assert(BD);
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
// Convert inter-branches between hot and cold fragments into
// intra-branches.
for (auto &[OffsetFrom, CallToMap] : Branches.InterIndex) {
for (auto &[CallToLoc, CallToIdx] : CallToMap) {
if (CallToLoc.Name != FuncName)
continue;
Branches.IntraIndex[OffsetFrom][CallToLoc.Offset] = CallToIdx;
Branches.InterIndex[OffsetFrom].erase(CallToLoc);
}
}
}
}

std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const {
std::error_code EC;
Expand Down Expand Up @@ -2345,9 +2332,6 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
assert(BF);
YamlBF.Name = FuncName.str();
Expand Down
16 changes: 8 additions & 8 deletions bolt/test/X86/yaml-secondary-entry-discriminator.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This reproduces a bug with BOLT setting incorrect discriminator for
# secondary entry points in YAML profile.
## This reproduces a bug with BOLT setting incorrect discriminator for
## secondary entry points in YAML profile.

# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
Expand All @@ -24,7 +24,7 @@
# CHECK-NEXT: hash: {{.*}}
# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ]

# Make sure that the profile is attached correctly
## Make sure that the profile is attached correctly
# RUN: llvm-bolt %t.exe -o %t.out --data %t.yaml --print-profile \
# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG

Expand All @@ -33,8 +33,8 @@
# CHECK-CFG: callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) :
# CHECK-CFG-NEXT: { secondary_entry: 1 (1 misses) }

# YAML BAT test of calling BAT secondary entry from non-BAT function
# Now force-split func and skip main (making it call secondary entries)
## YAML BAT test of calling BAT secondary entry from non-BAT function
## Now force-split func and skip main (making it call secondary entries)
# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

Expand All @@ -45,13 +45,13 @@ func:
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
# Placeholder code to make splitting profitable
## Placeholder code to make splitting profitable
.rept 5
testq %rax, %rax
.endr
.globl secondary_entry
secondary_entry:
# Placeholder code to make splitting profitable
## Placeholder code to make splitting profitable
.rept 5
testq %rax, %rax
.endr
Expand Down Expand Up @@ -81,7 +81,7 @@ Lindcall:
addq $16, %rsp
popq %rbp
retq
# For relocations against .text
## For relocations against .text
call exit
.cfi_endproc
.size main, .-main