Skip to content

Commit 3997f0e

Browse files
authored
[BOLT] Cover all call sites in writeBATYAML
Call site information setting was conditioned on branch information presence for a given block. However, it's possible to have sampled profile lacking one or the other for a given basic block. Iterate over branch profiles and call profiles independently to cover all recorded profile data. Depends on #87569 Test Plan: Updated bolt/test/X86/yaml-secondary-entry-discriminator.s Reviewers: ayermolo, dcci, maksfb, rafaelauler Reviewed By: maksfb Pull Request: #87743
1 parent 8840992 commit 3997f0e

File tree

4 files changed

+110
-92
lines changed

4 files changed

+110
-92
lines changed

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,6 @@ class BoltAddressTranslation {
119119
/// True if a given \p Address is a function with translation table entry.
120120
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
121121

122-
/// Returns branch offsets grouped by containing basic block in a given
123-
/// function.
124-
std::unordered_map<uint32_t, std::vector<uint32_t>>
125-
getBFBranches(uint64_t FuncOutputAddress) const;
126-
127122
/// For a given \p Symbol in the output binary and known \p InputOffset
128123
/// return a corresponding pair of parent BinaryFunction and secondary entry
129124
/// point in it.
@@ -193,7 +188,7 @@ class BoltAddressTranslation {
193188
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
194189
};
195190

196-
std::unordered_map<uint32_t, EntryTy> Map;
191+
std::map<uint32_t, EntryTy> Map;
197192
const EntryTy &getEntry(uint32_t BBInputOffset) const {
198193
auto It = Map.find(BBInputOffset);
199194
assert(It != Map.end());
@@ -218,6 +213,10 @@ class BoltAddressTranslation {
218213
}
219214

220215
size_t getNumBasicBlocks() const { return Map.size(); }
216+
217+
auto begin() const { return Map.begin(); }
218+
auto end() const { return Map.end(); }
219+
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
221220
};
222221

223222
/// Map function output address to its hash and basic blocks hash map.

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -582,26 +582,6 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
582582
}
583583
}
584584

585-
std::unordered_map<uint32_t, std::vector<uint32_t>>
586-
BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
587-
std::unordered_map<uint32_t, std::vector<uint32_t>> Branches;
588-
auto FuncIt = Maps.find(OutputAddress);
589-
assert(FuncIt != Maps.end());
590-
std::vector<uint32_t> InputOffsets;
591-
for (const auto &KV : FuncIt->second)
592-
InputOffsets.emplace_back(KV.second);
593-
// Sort with LSB BRANCHENTRY bit.
594-
llvm::sort(InputOffsets);
595-
uint32_t BBOffset{0};
596-
for (uint32_t InOffset : InputOffsets) {
597-
if (InOffset & BRANCHENTRY)
598-
Branches[BBOffset].push_back(InOffset >> 1);
599-
else
600-
BBOffset = InOffset >> 1;
601-
}
602-
return Branches;
603-
}
604-
605585
unsigned
606586
BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address,
607587
uint32_t Offset) const {

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 47 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2341,87 +2341,68 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23412341
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
23422342
const BoltAddressTranslation::BBHashMapTy &BlockMap =
23432343
BAT->getBBHashMap(FuncAddress);
2344+
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
23442345

2345-
auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
2346-
uint64_t SuccOffset, unsigned SuccDataIdx) {
2346+
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
2347+
YamlBB.Index = Idx;
2348+
2349+
for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
2350+
YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();
2351+
2352+
auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) {
23472353
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
23482354
yaml::bolt::SuccessorInfo SI;
23492355
SI.Index = BlockMap.getBBIndex(SuccOffset);
23502356
SI.Count = BI.Branches;
23512357
SI.Mispreds = BI.Mispreds;
2352-
YamlBB.Successors.emplace_back(SI);
2358+
return SI;
23532359
};
23542360

2355-
std::unordered_map<uint32_t, std::vector<uint32_t>> BFBranches =
2356-
BAT->getBFBranches(FuncAddress);
2357-
2358-
auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
2359-
uint64_t Offset) {
2360-
// Iterate over BRANCHENTRY records in the current block
2361-
for (uint32_t BranchOffset : BFBranches[Offset]) {
2362-
if (!Branches.InterIndex.contains(BranchOffset))
2363-
continue;
2364-
for (const auto &[CallToLoc, CallToIdx] :
2365-
Branches.InterIndex.at(BranchOffset)) {
2366-
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
2367-
yaml::bolt::CallSiteInfo YamlCSI;
2368-
YamlCSI.DestId = 0; // designated for unknown functions
2369-
YamlCSI.EntryDiscriminator = 0;
2370-
YamlCSI.Count = BI.Branches;
2371-
YamlCSI.Mispreds = BI.Mispreds;
2372-
YamlCSI.Offset = BranchOffset - Offset;
2373-
BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
2374-
if (!CallTargetBD) {
2375-
YamlBB.CallSites.emplace_back(YamlCSI);
2376-
continue;
2377-
}
2378-
uint64_t CallTargetAddress = CallTargetBD->getAddress();
2379-
BinaryFunction *CallTargetBF =
2380-
BC.getBinaryFunctionAtAddress(CallTargetAddress);
2381-
if (!CallTargetBF) {
2382-
YamlBB.CallSites.emplace_back(YamlCSI);
2383-
continue;
2384-
}
2385-
// Calls between hot and cold fragments must be handled in
2386-
// fixupBATProfile.
2387-
assert(CallTargetBF != BF && "invalid CallTargetBF");
2388-
YamlCSI.DestId = CallTargetBF->getFunctionNumber();
2389-
if (CallToLoc.Offset) {
2390-
if (BAT->isBATFunction(CallTargetAddress)) {
2391-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
2392-
"entry point in BAT function "
2393-
<< CallToLoc.Name << '\n');
2394-
} else if (const BinaryBasicBlock *CallTargetBB =
2395-
CallTargetBF->getBasicBlockAtOffset(
2396-
CallToLoc.Offset)) {
2397-
// Only record true call information, ignoring returns (normally
2398-
// won't have a target basic block) and jumps to the landing
2399-
// pads (not an entry point).
2400-
if (CallTargetBB->isEntryPoint()) {
2401-
YamlCSI.EntryDiscriminator =
2402-
CallTargetBF->getEntryIDForSymbol(
2403-
CallTargetBB->getLabel());
2404-
}
2405-
}
2406-
}
2407-
YamlBB.CallSites.emplace_back(YamlCSI);
2408-
}
2409-
}
2361+
auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx,
2362+
uint32_t Offset) {
2363+
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
2364+
yaml::bolt::CallSiteInfo CSI;
2365+
CSI.DestId = 0; // designated for unknown functions
2366+
CSI.EntryDiscriminator = 0;
2367+
CSI.Count = BI.Branches;
2368+
CSI.Mispreds = BI.Mispreds;
2369+
CSI.Offset = Offset;
2370+
if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
2371+
YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
2372+
CallToLoc.Offset);
2373+
return CSI;
24102374
};
24112375

24122376
for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
2413-
yaml::bolt::BinaryBasicBlockProfile YamlBB;
24142377
if (!BlockMap.isInputBlock(FromOffset))
24152378
continue;
2416-
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
2417-
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
2379+
const unsigned Index = BlockMap.getBBIndex(FromOffset);
2380+
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index];
24182381
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
2419-
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
2420-
addCallsProfile(YamlBB, FromOffset);
2421-
if (YamlBB.ExecCount || !YamlBB.Successors.empty() ||
2422-
!YamlBB.CallSites.empty())
2423-
YamlBF.Blocks.emplace_back(YamlBB);
2382+
if (BlockMap.isInputBlock(SuccOffset))
2383+
YamlBB.Successors.emplace_back(
2384+
getSuccessorInfo(SuccOffset, SuccDataIdx));
2385+
}
2386+
for (const auto &[FromOffset, CallTo] : Branches.InterIndex) {
2387+
auto BlockIt = BlockMap.upper_bound(FromOffset);
2388+
--BlockIt;
2389+
const unsigned BlockOffset = BlockIt->first;
2390+
const unsigned BlockIndex = BlockIt->second.getBBIndex();
2391+
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2392+
const uint32_t Offset = FromOffset - BlockOffset;
2393+
for (const auto &[CallToLoc, CallToIdx] : CallTo)
2394+
YamlBB.CallSites.emplace_back(
2395+
getCallSiteInfo(CallToLoc, CallToIdx, Offset));
2396+
llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo &A,
2397+
yaml::bolt::CallSiteInfo &B) {
2398+
return A.Offset < B.Offset;
2399+
});
24242400
}
2401+
// Drop blocks without a hash, won't be useful for stale matching.
2402+
llvm::erase_if(YamlBF.Blocks,
2403+
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2404+
return YamlBB.Hash == (yaml::Hex64)0;
2405+
});
24252406
BP.Functions.emplace_back(YamlBF);
24262407
}
24272408
}

bolt/test/X86/yaml-secondary-entry-discriminator.s

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,62 @@
3838
# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
3939
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat
4040

41+
## Prepare pre-aggregated profile using %t.bat
42+
# RUN: link_fdata %s %t.bat %t.preagg PREAGG
43+
## Strip labels used for pre-aggregated profile
44+
# RUN: llvm-strip -NLcall -NLindcall %t.bat
45+
46+
## Convert pre-aggregated profile using BAT
47+
# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml
48+
49+
## Convert BAT fdata into YAML
50+
# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null
51+
52+
## Check fdata YAML - make sure that a direct call has discriminator field
53+
# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML
54+
55+
## Check BAT YAML - make sure that a direct call has discriminator field
56+
# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML
57+
58+
## YAML BAT test of calling BAT secondary entry from BAT function
59+
# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
60+
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat
61+
62+
## Prepare pre-aggregated profile using %t.bat
63+
# RUN: link_fdata %s %t.bat2 %t.preagg2 PREAGG2
64+
65+
## Strip labels used for pre-aggregated profile
66+
# RUN: llvm-strip -NLcall -NLindcall %t.bat2
67+
68+
## Convert pre-aggregated profile using BAT
69+
# RUN: perf2bolt %t.bat2 -p %t.preagg2 --pa -o %t.bat2.fdata -w %t.bat2.yaml
70+
71+
## Convert BAT fdata into YAML
72+
# RUN: llvm-bolt %t.exe -data %t.bat2.fdata -w %t.bat2.fdata-yaml -o /dev/null
73+
74+
## Check fdata YAML - make sure that a direct call has discriminator field
75+
# RUN: FileCheck %s --input-file %t.bat2.fdata-yaml -check-prefix CHECK-BAT-YAML
76+
77+
## Check BAT YAML - make sure that a direct call has discriminator field
78+
# RUN: FileCheck %s --input-file %t.bat2.yaml --check-prefix CHECK-BAT-YAML
79+
80+
# CHECK-BAT-YAML: - name: main
81+
# CHECK-BAT-YAML-NEXT: fid: [[#]]
82+
# CHECK-BAT-YAML-NEXT: hash: 0xADF270D550151185
83+
# CHECK-BAT-YAML-NEXT: exec: 0
84+
# CHECK-BAT-YAML-NEXT: nblocks: 4
85+
# CHECK-BAT-YAML-NEXT: blocks:
86+
# CHECK-BAT-YAML: - bid: 1
87+
# CHECK-BAT-YAML-NEXT: insns: [[#]]
88+
# CHECK-BAT-YAML-NEXT: hash: 0x36A303CBA4360018
89+
# CHECK-BAT-YAML-NEXT: calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1
90+
4191
.globl func
4292
.type func, @function
4393
func:
4494
# FDATA: 0 [unknown] 0 1 func 0 1 0
95+
# PREAGG: B X:0 #func# 1 1
96+
# PREAGG2: B X:0 #func# 1 1
4597
.cfi_startproc
4698
pushq %rbp
4799
movq %rsp, %rbp
@@ -71,12 +123,18 @@ main:
71123
movl $0, -4(%rbp)
72124
testq %rax, %rax
73125
jne Lindcall
126+
.globl Lcall
74127
Lcall:
75128
call secondary_entry
76129
# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
130+
# PREAGG: B #Lcall# #secondary_entry# 1 1
131+
# PREAGG2: B #main.cold.0# #func.cold.0# 1 1
132+
.globl Lindcall
77133
Lindcall:
78134
callq *%rax
79135
# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
136+
# PREAGG: B #Lindcall# #secondary_entry# 1 1
137+
# PREAGG2: B #main.cold.1# #func.cold.0# 1 1
80138
xorl %eax, %eax
81139
addq $16, %rsp
82140
popq %rbp

0 commit comments

Comments
 (0)