Skip to content

[BOLT] Use heuristic for matching split local functions #90424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions bolt/docs/BAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,9 @@ Hot indices are delta encoded, implicitly starting at zero.
| `FuncHash` | 8b | Function hash for input function | Hot |
| `NumBlocks` | ULEB128 | Number of basic blocks in the original function | Hot |
| `NumSecEntryPoints` | ULEB128 | Number of secondary entry points in the original function | Hot |
| `ColdInputSkew` | ULEB128 | Skew to apply to all input offsets | Cold |
| `NumEntries` | ULEB128 | Number of address translation entries for a function | Both |
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Both |
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Both |
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Hot |
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Hot |

Function header is followed by *Address Translation Table* with `NumEntries`
total entries, and *Secondary Entry Points* table with `NumSecEntryPoints`
Expand All @@ -100,8 +99,8 @@ entry is encoded. Input offsets implicitly start at zero.
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |

The table omits the first `EqualElems` input offsets where the input offset
equals output offset.
For hot fragments, the table omits the first `EqualElems` input offsets
where the input offset equals output offset.

`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
Expand Down
6 changes: 3 additions & 3 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ class BoltAddressTranslation {
/// entries in function address translation map.
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);

/// Calculate the number of equal offsets (output = input - skew) in the
/// beginning of the function.
size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const;
/// Calculate the number of equal offsets (output = input) in the beginning
/// of the function.
size_t getNumEqualOffsets(const MapTy &Map) const;

std::map<uint64_t, MapTy> Maps;

Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,9 @@ class RewriteInstance {
/// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;

/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;

std::unique_ptr<DWARFRewriter> DebugInfoRewriter;

std::unique_ptr<BoltAddressTranslation> BAT;
Expand Down
17 changes: 15 additions & 2 deletions bolt/include/bolt/Utils/NameResolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,23 @@ class NameResolver {
static constexpr char Sep = '/';

public:
/// Return unique version of the \p Name in the form "Name<Sep><Number>".
/// Return the number of uniquified versions of a given \p Name.
uint64_t getUniquifiedNameCount(StringRef Name) const {
if (Counters.contains(Name))
return Counters.at(Name);
return 0;
}

/// Return unique version of the \p Name in the form "Name<Sep><ID>".
std::string getUniqueName(StringRef Name, const uint64_t ID) const {
return (Name + Twine(Sep) + Twine(ID)).str();
}

/// Register new version of \p Name and return unique version in the form
/// "Name<Sep><Number>".
std::string uniquify(StringRef Name) {
const uint64_t ID = ++Counters[Name];
return (Name + Twine(Sep) + Twine(ID)).str();
return getUniqueName(Name, ID);
}

/// For uniquified \p Name, return the original form (that may no longer be
Expand Down
86 changes: 42 additions & 44 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,12 @@ APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
return BitMask;
}

size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
uint32_t Skew) const {
size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
size_t EqualOffsets = 0;
for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint32_t OutputOffset = KeyVal.first;
const uint32_t InputOffset = KeyVal.second >> 1;
if (OutputOffset == InputOffset - Skew)
if (OutputOffset == InputOffset)
++EqualOffsets;
else
break;
Expand Down Expand Up @@ -197,17 +196,12 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
SecondaryEntryPointsMap.count(Address)
? SecondaryEntryPointsMap[Address].size()
: 0;
uint32_t Skew = 0;
if (Cold) {
auto HotEntryIt = Maps.find(ColdPartSource[Address]);
assert(HotEntryIt != Maps.end());
size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
encodeULEB128(HotIndex - PrevIndex, OS);
PrevIndex = HotIndex;
// Skew of all input offsets for cold fragments is simply the first input
// offset.
Skew = Map.begin()->second >> 1;
encodeULEB128(Skew, OS);
} else {
// Function hash
size_t BFHash = getBFHash(HotInputAddress);
Expand All @@ -223,21 +217,24 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
<< '\n');
}
encodeULEB128(NumEntries, OS);
// Encode the number of equal offsets (output = input - skew) in the
// beginning of the function. Only encode one offset in these cases.
const size_t EqualElems = getNumEqualOffsets(Map, Skew);
encodeULEB128(EqualElems, OS);
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BranchEntries: ";
SmallString<8> BitMaskStr;
BranchEntries.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << '\n';
});
// For hot fragments only: encode the number of equal offsets
// (output = input) in the beginning of the function. Only encode one offset
// in these cases.
const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
if (!Cold) {
encodeULEB128(EqualElems, OS);
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BranchEntries: ";
SmallString<8> BitMaskStr;
BranchEntries.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << '\n';
});
}
}
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
size_t Index = 0;
Expand Down Expand Up @@ -318,12 +315,10 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
uint64_t HotAddress = Cold ? 0 : Address;
PrevAddress = Address;
uint32_t SecondaryEntryPoints = 0;
uint64_t ColdInputSkew = 0;
if (Cold) {
HotIndex += DE.getULEB128(&Offset, &Err);
HotAddress = HotFuncs[HotIndex];
ColdPartSource.emplace(Address, HotAddress);
ColdInputSkew = DE.getULEB128(&Offset, &Err);
} else {
HotFuncs.push_back(Address);
// Function hash
Expand All @@ -344,25 +339,28 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
getULEB128Size(SecondaryEntryPoints)));
}
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
// Equal offsets.
const size_t EqualElems = DE.getULEB128(&Offset, &Err);
// Equal offsets, hot fragments only.
size_t EqualElems = 0;
APInt BEBitMask;
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n", EqualElems,
getULEB128Size(EqualElems)));
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
LoadIntFromMemory(
BEBitMask,
reinterpret_cast<const uint8_t *>(
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BEBitMask: ";
SmallString<8> BitMaskStr;
BEBitMask.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
});
if (!Cold) {
EqualElems = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
EqualElems, getULEB128Size(EqualElems)));
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
LoadIntFromMemory(
BEBitMask,
reinterpret_cast<const uint8_t *>(
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BEBitMask: ";
SmallString<8> BitMaskStr;
BEBitMask.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
});
}
}
MapTy Map;

Expand All @@ -377,7 +375,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
PrevAddress = OutputAddress;
int64_t InputDelta = 0;
if (J < EqualElems) {
InputOffset = ((OutputOffset + ColdInputSkew) << 1) | BEBitMask[J];
InputOffset = (OutputOffset << 1) | BEBitMask[J];
} else {
InputDelta = DE.getSLEB128(&Offset, &Err);
InputOffset += InputDelta;
Expand Down
145 changes: 118 additions & 27 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
continue;

if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
FileSymbols.emplace_back(Symbol);
StringRef Name =
cantFail(std::move(NameOrError), "cannot get symbol name for file");
// Ignore Clang LTO artificial FILE symbol as it is not always generated,
Expand Down Expand Up @@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
}

registerFragments();
FileSymbols.clear();
}

Error RewriteInstance::discoverRtFiniAddress() {
Expand Down Expand Up @@ -1417,50 +1419,139 @@ void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions)
return;

// Process fragments with ambiguous parents separately as they are typically a
// vanishing minority of cases and require expensive symbol table lookups.
std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!Function.isFragment())
continue;
unsigned ParentsFound = 0;
for (StringRef Name : Function.getNames()) {
StringRef BaseName, Suffix;
std::tie(BaseName, Suffix) = Name.split('/');
StringRef BaseName = NR.restore(Name);
const bool IsGlobal = BaseName == Name;
const size_t ColdSuffixPos = BaseName.find(".cold");
if (ColdSuffixPos == StringRef::npos)
continue;
// For cold function with local (foo.cold/1) symbol, prefer a parent with
// local symbol as well (foo/1) over global symbol (foo).
std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
if (Suffix != "") {
ParentName.append(Twine("/", Suffix).str());
const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
if (BDLocal || !BD)
BD = BDLocal;
}
if (!BD) {
if (opts::Verbosity >= 1)
BC->outs() << "BOLT-INFO: parent function not found for " << Name
<< "\n";
const uint64_t NumPossibleLocalParents =
NR.getUniquifiedNameCount(ParentName);
// The most common case: single local parent fragment.
if (!BD && NumPossibleLocalParents == 1) {
BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
} else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
// Global parent and either no local candidates (second most common), or
// the fragment is global as well (uncommon).
} else {
// Any other case: need to disambiguate using FILE symbols.
AmbiguousFragments.emplace_back(ParentName, &Function);
continue;
}
const uint64_t Address = BD->getAddress();
BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
if (!BF) {
if (opts::Verbosity >= 1)
BC->outs() << formatv(
"BOLT-INFO: parent function not found at {0:x}\n", Address);
continue;
if (BD) {
BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
if (BF) {
BC->registerFragment(Function, *BF);
continue;
}
}
BC->registerFragment(Function, *BF);
++ParentsFound;
}
if (!ParentsFound) {
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
<< '\n';
exit(1);
}
}

if (AmbiguousFragments.empty())
return;

if (!BC->hasSymbolsWithFileName()) {
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
"have FILE symbols. If the binary was stripped, preserve "
"FILE symbols with --keep-file-symbols strip option";
exit(1);
}

// The first global symbol is identified by the symbol table sh_info value.
// Used as local symbol search stopping point.
auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
return Sec.sh_type == ELF::SHT_SYMTAB;
});
assert(SymTab);
if (!SymTab->sh_info) {
BC->errs() << "BOLT-ERROR: malformed SYMTAB sh_info\n";
exit(1);
}
ELFSymbolRef FirstGlobal = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);

for (auto &[ParentName, BF] : AmbiguousFragments) {
const uint64_t Address = BF->getAddress();

// Get fragment's own symbol
const auto SymIt = FileSymRefs.find(Address);
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
<< Twine::utohexstr(Address) << '\n';
exit(1);
}

// Find containing FILE symbol
ELFSymbolRef Symbol = SymIt->second;
auto FSI = llvm::upper_bound(FileSymbols, Symbol);
if (FSI == FileSymbols.begin()) {
BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
<< cantFail(Symbol.getName()) << '\n';
exit(1);
}

ELFSymbolRef StopSymbol = FirstGlobal;
if (FSI != FileSymbols.end())
StopSymbol = *FSI;

uint64_t ParentAddress{0};

// BOLT split fragment symbols are emitted just before the main function
// symbol.
for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
NextSymbol.moveNext()) {
Expected<StringRef> NameOrError = NextSymbol.getName();
if (!NameOrError)
break;
StringRef Name = *NameOrError;
if (Name == ParentName) {
ParentAddress = cantFail(NextSymbol.getValue());
goto registerParent;
}
if (Name.starts_with(ParentName))
// With multi-way splitting, there are multiple fragments with different
// suffixes. Parent follows the last fragment.
continue;
break;
}

// Iterate over local file symbols and check symbol names to match parent.
for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
if (cantFail(Symbol.getName()) == ParentName) {
ParentAddress = cantFail(Symbol.getAddress());
break;
}
}

registerParent:
// No local parent is found, use global parent function.
if (!ParentAddress)
if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
ParentAddress = ParentBD->getAddress();

if (BinaryFunction *ParentBF =
BC->getBinaryFunctionAtAddress(ParentAddress)) {
BC->registerFragment(*BF, *ParentBF);
continue;
}
BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
exit(1);
}
}

void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
Expand Down
Loading