Skip to content

[BOLT][NFC] Refactor BAT metadata data structures #86353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 101 additions & 16 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,6 @@ class BoltAddressTranslation {
/// Save function and basic block hashes used for metadata dump.
void saveMetadata(BinaryContext &BC);

/// Returns BB hash by function output address (after BOLT) and basic block
/// input offset.
size_t getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;

/// Returns BB index by function output address (after BOLT) and basic block
/// input offset.
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

Expand All @@ -135,7 +124,7 @@ class BoltAddressTranslation {
/// emitted for the start of the BB. More entries may be emitted to cover
/// the location of calls or any instruction that may change control flow.
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
uint64_t FuncAddress);
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);

/// Write the serialized address translation table for a function.
template <bool Cold>
Expand All @@ -158,10 +147,6 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

/// Map basic block input offset to a basic block index and hash pair.
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;

/// Map a function to its basic blocks count
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;

Expand All @@ -174,6 +159,106 @@ class BoltAddressTranslation {
/// Identifies the address of a control-flow changing instructions in a
/// translation map entry
const static uint32_t BRANCHENTRY = 0x1;

public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
class EntryTy {
unsigned Index;
size_t Hash;

public:
unsigned getBBIndex() const { return Index; }
size_t getBBHash() const { return Hash; }
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::unordered_map<uint32_t, EntryTy> Map;
const EntryTy &getEntry(uint32_t BBInputOffset) const {
auto It = Map.find(BBInputOffset);
assert(It != Map.end());
return It->second;
}

public:
bool isInputBlock(uint32_t InputOffset) const {
return Map.count(InputOffset);
}

unsigned getBBIndex(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBIndex();
}

size_t getBBHash(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBHash();
}

void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
}

size_t getNumBasicBlocks() const { return Map.size(); }
};

/// Map function output address to its hash and basic blocks hash map.
class FuncHashesTy {
class EntryTy {
size_t Hash;
BBHashMapTy BBHashMap;

public:
size_t getBFHash() const { return Hash; }
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
EntryTy(size_t Hash) : Hash(Hash) {}
};

std::unordered_map<uint64_t, EntryTy> Map;
const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
auto It = Map.find(FuncOutputAddress);
assert(It != Map.end());
return It->second;
}

public:
size_t getBFHash(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBFHash();
}

const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBBHashMap();
}

void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
}

size_t getNumFunctions() const { return Map.size(); };

size_t getNumBasicBlocks() const {
size_t NumBasicBlocks{0};
for (auto &I : Map)
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
return NumBasicBlocks;
}
};

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t FuncOutputAddress) const {
return FuncHashes.getBFHash(FuncOutputAddress);
}

/// Returns BBHashMap by function output address (after BOLT).
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return FuncHashes.getBBHashMap(FuncOutputAddress);
}

BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
return const_cast<BBHashMapTy &>(
std::as_const(*this).getBBHashMap(FuncOutputAddress));
}

private:
FuncHashesTy FuncHashes;
};
} // namespace bolt

Expand Down
85 changes: 36 additions & 49 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@ const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";

void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncAddress) {
uint64_t HotFuncAddress = ColdPartSource.count(FuncAddress)
? ColdPartSource[FuncAddress]
: FuncAddress;
uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) {
const uint64_t BBOutputOffset =
BB.getOutputAddressRange().first - FuncAddress;
BB.getOutputAddressRange().first - FuncOutputAddress;
const uint32_t BBInputOffset = BB.getInputOffset();

// Every output BB must track back to an input BB for profile collection
Expand All @@ -42,11 +40,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
getBBIndex(HotFuncAddress, BBInputOffset)));
// NB: in `writeEntriesForBB` we use the input address because hashes are
// saved early in `saveMetadata` before output addresses are assigned.
const BBHashMapTy &BBHashMap = getBBHashMap(FuncInputAddress);
(void)BBHashMap;
LLVM_DEBUG(
dbgs() << formatv(" Hash: {0:x}\n", BBHashMap.getBBHash(BBInputOffset)));
LLVM_DEBUG(
dbgs() << formatv(" Index: {0}\n", BBHashMap.getBBIndex(BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
Expand All @@ -63,7 +64,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
assert(OutputAddress && "Unknown instruction address");
const auto OutputOffset = *OutputAddress - FuncAddress;
const auto OutputOffset = *OutputAddress - FuncOutputAddress;

// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
Expand Down Expand Up @@ -99,7 +100,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
MapTy Map;
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, Function.getOutputAddress());
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
Maps.emplace(Function.getOutputAddress(), std::move(Map));
ReverseMap.emplace(OutputAddress, InputAddress);

Expand All @@ -113,7 +114,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
writeEntriesForBB(Map, *BB, FF.getAddress());
writeEntriesForBB(Map, *BB, InputAddress, FF.getAddress());

Maps.emplace(FF.getAddress(), std::move(Map));
}
Expand All @@ -125,11 +126,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);

BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
const uint64_t NumBBHashes = std::accumulate(
FuncHashes.begin(), FuncHashes.end(), 0ull,
[](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
<< NumBBHashes << " basic block hashes\n";
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
<< " function and " << FuncHashes.getNumBasicBlocks()
<< " basic block hashes\n";
}

APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
Expand Down Expand Up @@ -176,11 +175,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
// Only process cold fragments in cold mode, and vice versa.
if (Cold != ColdPartSource.count(Address))
continue;
// NB: here we use the input address because hashes are saved early (in
// `saveMetadata`) before output addresses are assigned.
// NB: in `writeMaps` we use the input address because hashes are saved
// early in `saveMetadata` before output addresses are assigned.
const uint64_t HotInputAddress =
ReverseMap[Cold ? ColdPartSource[Address] : Address];
std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
MapTy &Map = MapEntry.second;
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
Expand All @@ -194,10 +192,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
PrevIndex = HotIndex;
} else {
// Function hash
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
size_t BFHash = getBFHash(HotInputAddress);
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
OS.write(reinterpret_cast<char *>(&BFHash), 8);
// Number of basic blocks
size_t NumBasicBlocks = FuncHashPair.second.size();
size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks();
LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n');
encodeULEB128(NumBasicBlocks, OS);
}
Expand All @@ -221,6 +220,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
});
}
}
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
Expand All @@ -233,9 +233,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
unsigned BBIndex;
size_t BBHash;
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
const bool IsBlock = BBHashMap.isInputBlock(InOffset >> 1);
unsigned BBIndex = IsBlock ? BBHashMap.getBBIndex(InOffset >> 1) : 0;
size_t BBHash = IsBlock ? BBHashMap.getBBHash(InOffset >> 1) : 0;
OS.write(reinterpret_cast<char *>(&BBHash), 8);
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
Expand Down Expand Up @@ -295,7 +295,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
HotFuncs.push_back(Address);
// Function hash
const size_t FuncHash = DE.getU64(&Offset, &Err);
FuncHashes[Address].first = FuncHash;
FuncHashes.addEntry(Address, FuncHash);
LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
// Number of basic blocks
const size_t NumBasicBlocks = DE.getULEB128(&Offset, &Err);
Expand Down Expand Up @@ -355,8 +355,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
std::pair(BBIndex, BBHash));
getBBHashMap(HotAddress).addEntry(InputOffset >> 1, BBIndex, BBHash);
}
LLVM_DEBUG({
dbgs() << formatv(
Expand Down Expand Up @@ -385,6 +384,8 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
OS << formatv(", hash: {0:x}", getBFHash(Address));
OS << "\n";
OS << "BB mappings:\n";
const BBHashMapTy &BBHashMap =
getBBHashMap(HotAddress ? HotAddress : Address);
for (const auto &Entry : MapEntry.second) {
const bool IsBranch = Entry.second & BRANCHENTRY;
const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
Expand All @@ -393,8 +394,7 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
if (IsBranch)
OS << " (branch)";
else
OS << formatv(" hash: {0:x}",
getBBHash(HotAddress ? HotAddress : Address, Val));
OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val));
OS << "\n";
}
OS << "\n";
Expand Down Expand Up @@ -515,27 +515,14 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
continue;
// Prepare function and block hashes
FuncHashes[BF.getAddress()].first = BF.computeHash();
FuncHashes.addEntry(BF.getAddress(), BF.computeHash());
BF.computeBlockHashes();
BBHashMapTy &BBHashMap = getBBHashMap(BF.getAddress());
// Set BF/BB metadata
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
}
}

unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
}

size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
}

size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
return FuncHashes.at(OutputAddress).first;
}

} // namespace bolt
} // namespace llvm