Skip to content

[BOLT] Cover all call sites in writeBATYAML #87743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,6 @@ class BoltAddressTranslation {
/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

/// Returns branch offsets grouped by containing basic block in a given
/// function.
std::unordered_map<uint32_t, std::vector<uint32_t>>
getBFBranches(uint64_t FuncOutputAddress) const;

/// For a given \p Symbol in the output binary and known \p InputOffset
/// return a corresponding pair of parent BinaryFunction and secondary entry
/// point in it.
Expand Down Expand Up @@ -193,7 +188,7 @@ class BoltAddressTranslation {
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::unordered_map<uint32_t, EntryTy> Map;
std::map<uint32_t, EntryTy> Map;
const EntryTy &getEntry(uint32_t BBInputOffset) const {
auto It = Map.find(BBInputOffset);
assert(It != Map.end());
Expand All @@ -218,6 +213,10 @@ class BoltAddressTranslation {
}

size_t getNumBasicBlocks() const { return Map.size(); }

auto begin() const { return Map.begin(); }
auto end() const { return Map.end(); }
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
};

/// Map function output address to its hash and basic blocks hash map.
Expand Down
20 changes: 0 additions & 20 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,26 +582,6 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
}
}

std::unordered_map<uint32_t, std::vector<uint32_t>>
BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
std::unordered_map<uint32_t, std::vector<uint32_t>> Branches;
auto FuncIt = Maps.find(OutputAddress);
assert(FuncIt != Maps.end());
std::vector<uint32_t> InputOffsets;
for (const auto &KV : FuncIt->second)
InputOffsets.emplace_back(KV.second);
// Sort with LSB BRANCHENTRY bit.
llvm::sort(InputOffsets);
uint32_t BBOffset{0};
for (uint32_t InOffset : InputOffsets) {
if (InOffset & BRANCHENTRY)
Branches[BBOffset].push_back(InOffset >> 1);
else
BBOffset = InOffset >> 1;
}
return Branches;
}

unsigned
BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address,
uint32_t Offset) const {
Expand Down
113 changes: 47 additions & 66 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2341,87 +2341,68 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);

auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t SuccOffset, unsigned SuccDataIdx) {
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
YamlBB.Index = Idx;

for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();

auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BlockMap.getBBIndex(SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
YamlBB.Successors.emplace_back(SI);
return SI;
};

std::unordered_map<uint32_t, std::vector<uint32_t>> BFBranches =
BAT->getBFBranches(FuncAddress);

auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t Offset) {
// Iterate over BRANCHENTRY records in the current block
for (uint32_t BranchOffset : BFBranches[Offset]) {
if (!Branches.InterIndex.contains(BranchOffset))
continue;
for (const auto &[CallToLoc, CallToIdx] :
Branches.InterIndex.at(BranchOffset)) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo YamlCSI;
YamlCSI.DestId = 0; // designated for unknown functions
YamlCSI.EntryDiscriminator = 0;
YamlCSI.Count = BI.Branches;
YamlCSI.Mispreds = BI.Mispreds;
YamlCSI.Offset = BranchOffset - Offset;
BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
if (!CallTargetBD) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
uint64_t CallTargetAddress = CallTargetBD->getAddress();
BinaryFunction *CallTargetBF =
BC.getBinaryFunctionAtAddress(CallTargetAddress);
if (!CallTargetBF) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
// Calls between hot and cold fragments must be handled in
// fixupBATProfile.
assert(CallTargetBF != BF && "invalid CallTargetBF");
YamlCSI.DestId = CallTargetBF->getFunctionNumber();
if (CallToLoc.Offset) {
if (BAT->isBATFunction(CallTargetAddress)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
"entry point in BAT function "
<< CallToLoc.Name << '\n');
} else if (const BinaryBasicBlock *CallTargetBB =
CallTargetBF->getBasicBlockAtOffset(
CallToLoc.Offset)) {
// Only record true call information, ignoring returns (normally
// won't have a target basic block) and jumps to the landing
// pads (not an entry point).
if (CallTargetBB->isEntryPoint()) {
YamlCSI.EntryDiscriminator =
CallTargetBF->getEntryIDForSymbol(
CallTargetBB->getLabel());
}
}
}
YamlBB.CallSites.emplace_back(YamlCSI);
}
}
auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx,
uint32_t Offset) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo CSI;
CSI.DestId = 0; // designated for unknown functions
CSI.EntryDiscriminator = 0;
CSI.Count = BI.Branches;
CSI.Mispreds = BI.Mispreds;
CSI.Offset = Offset;
if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
CallToLoc.Offset);
return CSI;
};

for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
if (!BlockMap.isInputBlock(FromOffset))
continue;
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
const unsigned Index = BlockMap.getBBIndex(FromOffset);
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index];
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
addCallsProfile(YamlBB, FromOffset);
if (YamlBB.ExecCount || !YamlBB.Successors.empty() ||
!YamlBB.CallSites.empty())
YamlBF.Blocks.emplace_back(YamlBB);
if (BlockMap.isInputBlock(SuccOffset))
YamlBB.Successors.emplace_back(
getSuccessorInfo(SuccOffset, SuccDataIdx));
}
for (const auto &[FromOffset, CallTo] : Branches.InterIndex) {
auto BlockIt = BlockMap.upper_bound(FromOffset);
--BlockIt;
const unsigned BlockOffset = BlockIt->first;
const unsigned BlockIndex = BlockIt->second.getBBIndex();
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
const uint32_t Offset = FromOffset - BlockOffset;
for (const auto &[CallToLoc, CallToIdx] : CallTo)
YamlBB.CallSites.emplace_back(
getCallSiteInfo(CallToLoc, CallToIdx, Offset));
llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo &A,
yaml::bolt::CallSiteInfo &B) {
return A.Offset < B.Offset;
});
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
return YamlBB.Hash == (yaml::Hex64)0;
});
BP.Functions.emplace_back(YamlBF);
}
}
Expand Down
58 changes: 58 additions & 0 deletions bolt/test/X86/yaml-secondary-entry-discriminator.s
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,62 @@
# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

## Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat %t.preagg PREAGG
## Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat

## Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml

## Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null

## Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML

## Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML

## YAML BAT test of calling BAT secondary entry from BAT function
# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

## Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat2 %t.preagg2 PREAGG2

## Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat2

## Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat2 -p %t.preagg2 --pa -o %t.bat2.fdata -w %t.bat2.yaml

## Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat2.fdata -w %t.bat2.fdata-yaml -o /dev/null

## Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.fdata-yaml -check-prefix CHECK-BAT-YAML

## Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.yaml --check-prefix CHECK-BAT-YAML

# CHECK-BAT-YAML: - name: main
# CHECK-BAT-YAML-NEXT: fid: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0xADF270D550151185
# CHECK-BAT-YAML-NEXT: exec: 0
# CHECK-BAT-YAML-NEXT: nblocks: 4
# CHECK-BAT-YAML-NEXT: blocks:
# CHECK-BAT-YAML: - bid: 1
# CHECK-BAT-YAML-NEXT: insns: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0x36A303CBA4360018
# CHECK-BAT-YAML-NEXT: calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1

.globl func
.type func, @function
func:
# FDATA: 0 [unknown] 0 1 func 0 1 0
# PREAGG: B X:0 #func# 1 1
# PREAGG2: B X:0 #func# 1 1
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
Expand Down Expand Up @@ -71,12 +123,18 @@ main:
movl $0, -4(%rbp)
testq %rax, %rax
jne Lindcall
.globl Lcall
Lcall:
call secondary_entry
# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.0# #func.cold.0# 1 1
.globl Lindcall
Lindcall:
callq *%rax
# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lindcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.1# #func.cold.0# 1 1
xorl %eax, %eax
addq $16, %rsp
popq %rbp
Expand Down