Skip to content

[BOLT] Expose external entry count for functions #141674

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ class BinaryFunction {
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};

/// Profile data for the number of times this function was entered from
/// external code (DSO, JIT, etc).
uint64_t ExternEntryCount{0};

/// Profile match ratio.
float ProfileMatchRatio{0.0f};

Expand Down Expand Up @@ -1877,6 +1881,10 @@ class BinaryFunction {
return *this;
}

/// Set the profile data for the number of times the function was entered from
/// external code (DSO/JIT).
void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }

/// Adjust execution count for the function by a given \p Count. The value
/// \p Count will be subtracted from the current function count.
///
Expand Down Expand Up @@ -1904,6 +1912,10 @@ class BinaryFunction {
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }

/// Return the profile information about the number of times the function was
/// entered from external code (DSO/JIT).
uint64_t getExternEntryCount() const { return ExternEntryCount; }

/// Return the raw profile information about the number of branch
/// executions corresponding to this function.
uint64_t getRawSampleCount() const { return RawSampleCount; }
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Profile/DataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ struct FuncBranchData {
/// Total execution count for the function.
int64_t ExecutionCount{0};

/// Total entry count from external code for the function.
uint64_t ExternEntryCount{0};

/// Indicate if the data was used.
bool Used{false};

Expand Down
2 changes: 2 additions & 0 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
uint32_t Id{0};
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
uint64_t ExternEntryCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
std::vector<InlineTreeNode> InlineTree;
bool Used{false};
Expand All @@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("fid", BFP.Id);
YamlIO.mapRequired("hash", BFP.Hash);
YamlIO.mapRequired("exec", BFP.ExecCount);
YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
Expand Down
2 changes: 2 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Sample Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
if (ExternEntryCount)
OS << "\n Extern Entry Count: " << ExternEntryCount;

if (opts::PrintDynoStats && !getLayout().block_empty()) {
OS << '\n';
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Passes/ProfileQualityStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];

// Record external entry count into CallGraphIncomingFlows
CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();

// Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
const MCSymbol *DestSymbol, uint64_t Count,
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2289,6 +2289,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
YamlBF.ExternEntryCount = BF->getExternEntryCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
Expand Down
6 changes: 6 additions & 0 deletions bolt/lib/Profile/DataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
}
llvm::stable_sort(Data);
ExecutionCount += FBD.ExecutionCount;
ExternEntryCount += FBD.ExternEntryCount;
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
assert(I->To.Name == FBD.Name);
auto NewElmt = EntryData.insert(EntryData.end(), *I);
Expand Down Expand Up @@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
setBranchData(Function, FuncData);
Function.ExecutionCount = FuncData->ExecutionCount;
Function.ExternEntryCount = FuncData->ExternEntryCount;
FuncData->Used = true;
}
}
Expand Down Expand Up @@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
BF.ExternEntryCount = FBD->ExternEntryCount;
BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
Expand Down Expand Up @@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
setBranchData(BF, NewBranchData);
NewBranchData->Used = true;
BF.ExecutionCount = NewBranchData->ExecutionCount;
BF.ExternEntryCount = NewBranchData->ExternEntryCount;
BF.ProfileMatchRatio = 1.0f;
break;
}
Expand Down Expand Up @@ -1190,6 +1194,8 @@ std::error_code DataReader::parse() {
if (BI.To.IsSymbol && BI.To.Offset == 0) {
I = GetOrCreateFuncEntry(BI.To.Name);
I->second.ExecutionCount += BI.Branches;
if (!BI.From.IsSymbol)
I->second.ExternEntryCount += BI.Branches;
}
}

Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
uint64_t FunctionExecutionCount = 0;

BF.setExecutionCount(YamlBF.ExecCount);
BF.setExternEntryCount(YamlBF.ExternEntryCount);

uint64_t FuncRawBranchCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
YamlBF.ExternEntryCount = BF.getExternEntryCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
Expand Down
2 changes: 2 additions & 0 deletions bolt/test/X86/shrinkwrapping.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ REQUIRES: shell

RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
RUN: --print-only=main --print-cfg \
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
RUN: FileCheck %s --check-prefix=CHECK-BOLT

Expand All @@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT

CHECK-BOLT: Extern Entry Count: 100
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops

CHECK-INPUT: DW_CFA_advance_loc: 2
Expand Down
Loading