Skip to content

Commit 23c8d38

Browse files
committed
[BOLT] Calculate input to output address map using BOLTLinker
BOLT uses MCAsmLayout to calculate the output values of basic blocks. This means output values are calculated based on a pre-linking state and any changes to symbol values during linking will cause incorrect values to be used. This issue was first addressed in D154604 by adding all basic block symbols to the symbol table for the linker to resolve them. However, the runtime overhead of handling this huge symbol table turned out to be prohibitively large. This patch solves the issue in a different way. First, a temporary section containing [input address, output symbol] pairs is emitted to the intermediary object file. The linker will resolve all these references so we end up with a section of [input address, output address] pairs. This section is then parsed and used to: - Replace BinaryBasicBlock::OffsetTranslationTable - Replace BinaryFunction::InputOffsetToAddressMap - Update BinaryBasicBlock::OutputAddressRange Note that the reason this is more performant than the previous attempt is that these symbol references do not cause entries to be added to the symbol table. Instead, section-relative references are used for the relocations. Reviewed By: maksfb Differential Revision: https://reviews.llvm.org/D155604
1 parent b09c575 commit 23c8d38

13 files changed

+183
-70
lines changed

bolt/include/bolt/Core/AddressMap.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
//===- bolt/Core/AddressMap.h - Input-output address map --------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Helper class to create a mapping from input to output addresses needed for
10+
// updating debugging symbols and BAT. We emit an MCSection containing
11+
// <Input address, Output MCSymbol> pairs to the object file and JITLink will
12+
// transform this in <Input address, Output address> pairs. The linker output
13+
// can then be parsed and used to establish the mapping.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
//
17+
#ifndef BOLT_CORE_ADDRESS_MAP_H
18+
#define BOLT_CORE_ADDRESS_MAP_H
19+
20+
#include "llvm/ADT/StringRef.h"
21+
22+
#include <optional>
23+
#include <unordered_map>
24+
25+
namespace llvm {
26+
27+
class MCStreamer;
28+
29+
namespace bolt {
30+
31+
class BinaryContext;
32+
33+
class AddressMap {
34+
using MapTy = std::unordered_multimap<uint64_t, uint64_t>;
35+
MapTy Map;
36+
37+
public:
38+
static const char *const SectionName;
39+
40+
static void emit(MCStreamer &Streamer, BinaryContext &BC);
41+
static AddressMap parse(StringRef Buffer, const BinaryContext &BC);
42+
43+
std::optional<uint64_t> lookup(uint64_t InputAddress) const {
44+
auto It = Map.find(InputAddress);
45+
if (It != Map.end())
46+
return It->second;
47+
return std::nullopt;
48+
}
49+
50+
std::pair<MapTy::const_iterator, MapTy::const_iterator>
51+
lookupAll(uint64_t InputAddress) const {
52+
return Map.equal_range(InputAddress);
53+
}
54+
};
55+
56+
} // namespace bolt
57+
} // namespace llvm
58+
59+
#endif

bolt/include/bolt/Core/BinaryBasicBlock.h

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,6 @@ class BinaryBasicBlock {
100100
using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
101101
std::unique_ptr<LocSymsTy> LocSyms;
102102

103-
/// After output/codegen, map output offsets of instructions in this basic
104-
/// block to instruction offsets in the original function. Note that the
105-
/// output basic block could be different from the input basic block.
106-
/// We only map instruction of interest, such as calls and markers.
107-
///
108-
/// We store the offset array in a basic block to facilitate BAT tables
109-
/// generation. Otherwise, the mapping could be done at function level.
110-
using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>;
111-
std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;
112-
113103
/// Alignment requirements for the block.
114104
uint32_t Alignment{1};
115105

@@ -828,8 +818,7 @@ class BinaryBasicBlock {
828818
return OutputAddressRange;
829819
}
830820

831-
/// Update addresses of special instructions inside this basic block.
832-
void updateOutputValues(const MCAsmLayout &Layout);
821+
bool hasLocSyms() const { return LocSyms != nullptr; }
833822

834823
/// Return mapping of input offsets to symbols in the output.
835824
LocSymsTy &getLocSyms() {
@@ -841,19 +830,6 @@ class BinaryBasicBlock {
841830
return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
842831
}
843832

844-
/// Return offset translation table for the basic block.
845-
OffsetTranslationTableTy &getOffsetTranslationTable() {
846-
return OffsetTranslationTable
847-
? *OffsetTranslationTable
848-
: *(OffsetTranslationTable =
849-
std::make_unique<OffsetTranslationTableTy>());
850-
}
851-
852-
/// Return offset translation table for the basic block.
853-
const OffsetTranslationTableTy &getOffsetTranslationTable() const {
854-
return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable();
855-
}
856-
857833
/// Return size of the basic block in the output binary.
858834
uint64_t getOutputSize() const {
859835
return OutputAddressRange.second - OutputAddressRange.first;

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef BOLT_CORE_BINARY_CONTEXT_H
1414
#define BOLT_CORE_BINARY_CONTEXT_H
1515

16+
#include "bolt/Core/AddressMap.h"
1617
#include "bolt/Core/BinaryData.h"
1718
#include "bolt/Core/BinarySection.h"
1819
#include "bolt/Core/DebugData.h"
@@ -221,6 +222,9 @@ class BinaryContext {
221222
bool ContainsDwarf5{false};
222223
bool ContainsDwarfLegacy{false};
223224

225+
/// Mapping from input to output addresses.
226+
std::optional<AddressMap> IOAddressMap;
227+
224228
/// Preprocess DWO debug information.
225229
void preprocessDWODebugInfo();
226230

@@ -1343,6 +1347,12 @@ class BinaryContext {
13431347
/* DWARFMustBeAtTheEnd */ false));
13441348
return Streamer;
13451349
}
1350+
1351+
void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
1352+
const AddressMap &getIOAddressMap() const {
1353+
assert(IOAddressMap && "Address map not set yet");
1354+
return *IOAddressMap;
1355+
}
13461356
};
13471357

13481358
template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -577,9 +577,6 @@ class BinaryFunction {
577577
/// Count the number of functions created.
578578
static uint64_t Count;
579579

580-
/// Map offsets of special instructions to addresses in the output.
581-
InputOffsetToAddressMapTy InputOffsetToAddressMap;
582-
583580
/// Register alternative function name.
584581
void addAlternativeName(std::string NewName) {
585582
Aliases.push_back(std::move(NewName));
@@ -1226,13 +1223,6 @@ class BinaryFunction {
12261223
/// Update output values of the function based on the final \p Layout.
12271224
void updateOutputValues(const MCAsmLayout &Layout);
12281225

1229-
/// Return mapping of input to output addresses. Most users should call
1230-
/// translateInputToOutputAddress() for address translation.
1231-
InputOffsetToAddressMapTy &getInputOffsetToAddressMap() {
1232-
assert(isEmitted() && "cannot use address mapping before code emission");
1233-
return InputOffsetToAddressMap;
1234-
}
1235-
12361226
/// Register relocation type \p RelType at a given \p Address in the function
12371227
/// against \p Symbol.
12381228
/// Assert if the \p Address is not inside this function.
@@ -2180,6 +2170,11 @@ class BinaryFunction {
21802170
/// its code emission.
21812171
bool requiresAddressTranslation() const;
21822172

2173+
/// Return true if the linker needs to generate an address map for this
2174+
/// function. Used for keeping track of the mapping from input to out
2175+
/// addresses of basic blocks.
2176+
bool requiresAddressMap() const;
2177+
21832178
/// Adjust branch instructions to match the CFG.
21842179
///
21852180
/// As it comes to internal branches, the CFG represents "the ultimate source

bolt/include/bolt/Core/BinarySection.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ class BinarySection {
9797
mutable bool IsReordered{false}; // Have the contents been reordered?
9898
bool IsAnonymous{false}; // True if the name should not be included
9999
// in the output file.
100+
bool IsLinkOnly{false}; // True if the section should not be included
101+
// in the output file.
100102

101103
uint64_t hash(const BinaryData &BD,
102104
std::map<const BinaryData *, uint64_t> &Cache) const;
@@ -452,6 +454,8 @@ class BinarySection {
452454
void setIndex(uint32_t I) { Index = I; }
453455
void setOutputName(const Twine &Name) { OutputName = Name.str(); }
454456
void setAnonymous(bool Flag) { IsAnonymous = Flag; }
457+
bool isLinkOnly() const { return IsLinkOnly; }
458+
void setLinkOnly() { IsLinkOnly = true; }
455459

456460
/// Emit the section as data, possibly with relocations.
457461
/// Use name \p SectionName for the section during the emission.

bolt/lib/Core/AddressMap.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include "bolt/Core/AddressMap.h"
2+
#include "bolt/Core/BinaryContext.h"
3+
#include "bolt/Core/BinaryFunction.h"
4+
#include "llvm/MC/MCStreamer.h"
5+
#include "llvm/Support/DataExtractor.h"
6+
7+
namespace llvm {
8+
namespace bolt {
9+
10+
const char *const AddressMap::SectionName = ".bolt.address_map";
11+
12+
static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress,
13+
const MCSymbol *OutputLabel) {
14+
Streamer.emitIntValue(InputAddress, 8);
15+
Streamer.emitSymbolValue(OutputLabel, 8);
16+
}
17+
18+
void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) {
19+
Streamer.switchSection(BC.getDataSection(SectionName));
20+
21+
for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) {
22+
if (!BF.requiresAddressMap())
23+
continue;
24+
25+
for (const auto &BB : BF) {
26+
if (!BB.getLabel()->isDefined())
27+
continue;
28+
29+
emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first,
30+
BB.getLabel());
31+
32+
if (!BB.hasLocSyms())
33+
continue;
34+
35+
for (auto [Offset, Symbol] : BB.getLocSyms())
36+
emitLabel(Streamer, BFAddress + Offset, Symbol);
37+
}
38+
}
39+
}
40+
41+
AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) {
42+
const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
43+
assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
44+
45+
DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
46+
BC.AsmInfo->getCodePointerSize());
47+
DataExtractor::Cursor Cursor(0);
48+
49+
AddressMap Parsed;
50+
Parsed.Map.reserve(Buffer.size() / EntrySize);
51+
52+
while (Cursor && !DE.eof(Cursor)) {
53+
const auto Input = DE.getAddress(Cursor);
54+
const auto Output = DE.getAddress(Cursor);
55+
Parsed.Map.insert({Input, Output});
56+
}
57+
58+
assert(Cursor && "Error reading address map section");
59+
return Parsed;
60+
}
61+
62+
} // namespace bolt
63+
} // namespace llvm

bolt/lib/Core/BinaryBasicBlock.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -613,27 +613,5 @@ BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) {
613613
return NewBlock;
614614
}
615615

616-
void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) {
617-
if (!LocSyms)
618-
return;
619-
620-
const uint64_t BBAddress = getOutputAddressRange().first;
621-
const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel());
622-
for (const auto &LocSymKV : *LocSyms) {
623-
const uint32_t InputFunctionOffset = LocSymKV.first;
624-
const uint32_t OutputOffset = static_cast<uint32_t>(
625-
Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
626-
getOffsetTranslationTable().emplace_back(
627-
std::make_pair(OutputOffset, InputFunctionOffset));
628-
629-
// Update reverse (relative to BAT) address lookup table for function.
630-
if (getFunction()->requiresAddressTranslation()) {
631-
getFunction()->getInputOffsetToAddressMap().emplace(
632-
std::make_pair(InputFunctionOffset, OutputOffset + BBAddress));
633-
}
634-
}
635-
LocSyms.reset(nullptr);
636-
}
637-
638616
} // namespace bolt
639617
} // namespace llvm

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ void BinaryEmitter::emitAll(StringRef OrgSecPrefix) {
214214
}
215215

216216
emitDataSections(OrgSecPrefix);
217+
218+
// TODO Enable for Mach-O once BinaryContext::getDataSection supports it.
219+
if (BC.isELF())
220+
AddressMap::emit(Streamer, BC);
217221
}
218222

219223
void BinaryEmitter::emitFunctions() {

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2855,6 +2855,14 @@ bool BinaryFunction::requiresAddressTranslation() const {
28552855
return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
28562856
}
28572857

2858+
bool BinaryFunction::requiresAddressMap() const {
2859+
if (isInjected())
2860+
return false;
2861+
2862+
return opts::UpdateDebugSections || isMultiEntry() ||
2863+
requiresAddressTranslation();
2864+
}
2865+
28582866
uint64_t BinaryFunction::getInstructionCount() const {
28592867
uint64_t Count = 0;
28602868
for (const BinaryBasicBlock &BB : blocks())
@@ -4120,15 +4128,13 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
41204128
assert(FragmentBaseAddress == getOutputAddress());
41214129
}
41224130

4123-
const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
4124-
const uint64_t BBAddress = FragmentBaseAddress + BBOffset;
4131+
const uint64_t BBAddress =
4132+
*BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress());
41254133
BB->setOutputStartAddress(BBAddress);
41264134

41274135
if (PrevBB)
41284136
PrevBB->setOutputEndAddress(BBAddress);
41294137
PrevBB = BB;
4130-
4131-
BB->updateOutputValues(Layout);
41324138
}
41334139

41344140
PrevBB->setOutputEndAddress(PrevBB->isSplit()
@@ -4181,9 +4187,8 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
41814187

41824188
// Check if the address is associated with an instruction that is tracked
41834189
// by address translation.
4184-
auto KV = InputOffsetToAddressMap.find(Address - getAddress());
4185-
if (KV != InputOffsetToAddressMap.end())
4186-
return KV->second;
4190+
if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
4191+
return *OutputAddress;
41874192

41884193
// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
41894194
// intact. Instead we can use pseudo instructions and/or annotations.

bolt/lib/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
1111
)
1212

1313
add_llvm_library(LLVMBOLTCore
14+
AddressMap.cpp
1415
BinaryBasicBlock.cpp
1516
BinaryContext.cpp
1617
BinaryData.cpp

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
4646
// allowing it to overwrite the previously inserted key in the map.
4747
Map[BBOutputOffset] = BBInputOffset;
4848

49-
for (const auto &IOPair : BB.getOffsetTranslationTable()) {
50-
const uint64_t OutputOffset = IOPair.first + BBOutputOffset;
51-
const uint32_t InputOffset = IOPair.second;
49+
const auto &IOAddressMap =
50+
BB.getFunction()->getBinaryContext().getIOAddressMap();
51+
52+
for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
53+
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
54+
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
55+
assert(OutputAddress && "Unknown instruction address");
56+
const auto OutputOffset = *OutputAddress - FuncAddress;
5257

5358
// Is this the first instruction in the BB? No need to duplicate the entry.
5459
if (OutputOffset == BBOutputOffset)

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
183183
// A call probe may be duplicated due to ICP
184184
// Go through output of InputOffsetToAddressMap to collect all related
185185
// probes
186-
const InputOffsetToAddressMapTy &Offset2Addr =
187-
F->getInputOffsetToAddressMap();
188-
auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
186+
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
189187
auto CallOutputAddress = CallOutputAddresses.first;
190188
if (CallOutputAddress == CallOutputAddresses.second) {
191189
Probe->setAddress(INT64_MAX);

0 commit comments

Comments
 (0)