Skip to content

Commit 79d695f

Browse files
authored
[BOLT][NFCI] Speedup BAT::writeMaps
For a large binary with BAT section of size 38 MB with ~170k maps, reduces writeMaps time from 70s down to 1s. The inefficiency was in the use of std::distance with std::map::iterator which doesn't provide random access. Use sorted vector for lookups. Test Plan: NFC Reviewers: maksfb, rafaelauler, dcci, ayermolo Reviewed By: maksfb Pull Request: #112061
1 parent dd326b1 commit 79d695f

File tree

2 files changed

+16
-17
lines changed

2 files changed

+16
-17
lines changed

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,13 @@ class BoltAddressTranslation {
141141
uint64_t FuncOutputAddress) const;
142142

143143
/// Write the serialized address translation table for a function.
144-
template <bool Cold>
145-
void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
146-
raw_ostream &OS);
144+
template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS);
147145

148146
/// Read the serialized address translation table for a function.
149147
/// Return a parse error if failed.
150148
template <bool Cold>
151-
void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
152-
DataExtractor &DE, uint64_t &Offset, Error &Err);
149+
void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset,
150+
Error &Err);
153151

154152
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
155153
/// entries in function address translation map.
@@ -161,6 +159,9 @@ class BoltAddressTranslation {
161159

162160
std::map<uint64_t, MapTy> Maps;
163161

162+
/// Ordered vector with addresses of hot functions.
163+
std::vector<uint64_t> HotFuncs;
164+
164165
/// Map a function to its basic blocks count
165166
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
166167

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
143143

144144
// Output addresses are delta-encoded
145145
uint64_t PrevAddress = 0;
146-
writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
147-
writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
146+
writeMaps</*Cold=*/false>(PrevAddress, OS);
147+
writeMaps</*Cold=*/true>(PrevAddress, OS);
148148

149149
BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
150150
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
@@ -182,8 +182,7 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
182182
}
183183

184184
template <bool Cold>
185-
void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
186-
uint64_t &PrevAddress, raw_ostream &OS) {
185+
void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
187186
const uint32_t NumFuncs =
188187
llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
189188
return Cold == ColdPartSource.count(Address);
@@ -213,16 +212,17 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
213212
: 0;
214213
uint32_t Skew = 0;
215214
if (Cold) {
216-
auto HotEntryIt = Maps.find(ColdPartSource[Address]);
217-
assert(HotEntryIt != Maps.end());
218-
size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
215+
auto HotEntryIt = llvm::lower_bound(HotFuncs, ColdPartSource[Address]);
216+
assert(HotEntryIt != HotFuncs.end());
217+
size_t HotIndex = std::distance(HotFuncs.begin(), HotEntryIt);
219218
encodeULEB128(HotIndex - PrevIndex, OS);
220219
PrevIndex = HotIndex;
221220
// Skew of all input offsets for cold fragments is simply the first input
222221
// offset.
223222
Skew = Map.begin()->second >> 1;
224223
encodeULEB128(Skew, OS);
225224
} else {
225+
HotFuncs.push_back(Address);
226226
// Function hash
227227
size_t BFHash = getBFHash(HotInputAddress);
228228
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
@@ -311,17 +311,15 @@ std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
311311
return make_error_code(llvm::errc::io_error);
312312

313313
Error Err(Error::success());
314-
std::vector<uint64_t> HotFuncs;
315314
uint64_t PrevAddress = 0;
316-
parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
317-
parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
315+
parseMaps</*Cold=*/false>(PrevAddress, DE, Offset, Err);
316+
parseMaps</*Cold=*/true>(PrevAddress, DE, Offset, Err);
318317
OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
319318
return errorToErrorCode(std::move(Err));
320319
}
321320

322321
template <bool Cold>
323-
void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
324-
uint64_t &PrevAddress, DataExtractor &DE,
322+
void BoltAddressTranslation::parseMaps(uint64_t &PrevAddress, DataExtractor &DE,
325323
uint64_t &Offset, Error &Err) {
326324
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
327325
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")

0 commit comments

Comments
 (0)