Skip to content

Commit dcba077

Browse files
authored
[BOLT] Embed cold mapping info into function entry in BAT (#76903)
Reduces BAT section size: - large binary: to 12283500 bytes (0.32x original size), - medium binary: to 1616020 bytes (0.27x original size), - small binary: to 404 bytes (0.28x original size). Test Plan: Updated bolt/test/X86/bolt-address-translation.test
1 parent 93efa2b commit dcba077

File tree

4 files changed

+74
-58
lines changed

4 files changed

+74
-58
lines changed

bolt/docs/BAT.md

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ contain the metadata for input functions.
2121
# Internals
2222
## Section contents
2323
The section is organized as follows:
24-
- Functions table
24+
- Hot functions table
2525
- Address translation tables
26-
- Fragment linkage table
26+
- Cold functions table
2727

2828
## Construction and parsing
2929
BAT section is created from `BoltAddressTranslation` class which captures
@@ -43,21 +43,25 @@ and [BoltAddressTranslation.cpp](/bolt/lib/Profile/BoltAddressTranslation.cpp).
4343
### Layout
4444
The general layout is as follows:
4545
```
46-
Functions table header
46+
Hot functions table header
4747
|------------------|
4848
| Function entry |
4949
| |--------------| |
5050
| | OutOff InOff | |
5151
| |--------------| |
5252
~~~~~~~~~~~~~~~~~~~~
5353
54-
Fragment linkage header
54+
Cold functions table header
5555
|------------------|
56-
| ColdAddr HotAddr |
56+
| Function entry |
57+
| |--------------| |
58+
| | OutOff InOff | |
59+
| |--------------| |
5760
~~~~~~~~~~~~~~~~~~~~
5861
```
5962

6063
### Functions table
64+
Hot and cold functions tables share the encoding except difference marked below.
6165
Header:
6266
| Entry | Encoding | Description |
6367
| ------ | ----- | ----------- |
@@ -66,9 +70,11 @@ Header:
6670
The header is followed by Functions table with `NumFuncs` entries.
6771
Output binary addresses are delta encoded, meaning that only the difference with
6872
the previous output address is stored. Addresses implicitly start at zero.
73+
Hot indices are delta encoded, implicitly starting at zero.
6974
| Entry | Encoding | Description |
7075
| ------ | ------| ----------- |
7176
| `Address` | Delta, ULEB128 | Function address in the output binary |
77+
| `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table |
7278
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
7379

7480
Function header is followed by `NumEntries` pairs of offsets for current
@@ -85,17 +91,3 @@ entry is encoded. Offsets implicitly start at zero.
8591
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
8692
(branch or call instruction). If not set, it signifies a control flow target
8793
(basic block offset).
88-
89-
### Fragment linkage table
90-
Following Functions table, fragment linkage table is encoded to link split
91-
cold fragments with main (hot) fragment.
92-
Header:
93-
| Entry | Encoding | Description |
94-
| ------ | ------------ | ----------- |
95-
| `NumColdEntries` | ULEB128 | Number of split functions in the functions table |
96-
97-
`NumColdEntries` pairs of addresses follow:
98-
| Entry | Encoding | Description |
99-
| ------ | ------| ----------- |
100-
| `ColdAddress` | ULEB128 | Cold fragment address in output binary |
101-
| `HotAddress` | ULEB128 | Hot fragment address in output binary |

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "llvm/ADT/SmallVector.h"
1313
#include "llvm/ADT/StringRef.h"
14+
#include "llvm/Support/DataExtractor.h"
1415
#include <cstdint>
1516
#include <map>
1617
#include <optional>
@@ -118,6 +119,16 @@ class BoltAddressTranslation {
118119
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
119120
uint64_t FuncAddress);
120121

122+
/// Write the serialized address translation table for a function.
123+
template <bool Cold>
124+
void writeMaps(std::map<uint64_t, MapTy> &Maps, raw_ostream &OS);
125+
126+
/// Read the serialized address translation table for a function.
127+
/// Return a parse error if failed.
128+
template <bool Cold>
129+
void parseMaps(std::vector<uint64_t> &HotFuncs, DataExtractor &DE,
130+
uint64_t &Offset, Error &Err);
131+
121132
std::map<uint64_t, MapTy> Maps;
122133

123134
/// Links outlined cold bocks to their original function

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 51 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include "bolt/Profile/BoltAddressTranslation.h"
1010
#include "bolt/Core/BinaryFunction.h"
11-
#include "llvm/Support/DataExtractor.h"
1211
#include "llvm/Support/Errc.h"
1312
#include "llvm/Support/Error.h"
1413
#include "llvm/Support/LEB128.h"
@@ -103,18 +102,42 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
103102
}
104103
}
105104

106-
const uint32_t NumFuncs = Maps.size();
105+
writeMaps</*Cold=*/false>(Maps, OS);
106+
writeMaps</*Cold=*/true>(Maps, OS);
107+
108+
outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
109+
}
110+
111+
template <bool Cold>
112+
void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
113+
raw_ostream &OS) {
114+
const uint32_t NumFuncs =
115+
llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
116+
return Cold == ColdPartSource.count(Address);
117+
});
107118
encodeULEB128(NumFuncs, OS);
108-
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
119+
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
120+
<< " functions for BAT.\n");
121+
size_t PrevIndex = 0;
122+
// Output addresses are delta-encoded
109123
uint64_t PrevAddress = 0;
110124
for (auto &MapEntry : Maps) {
111125
const uint64_t Address = MapEntry.first;
126+
// Only process cold fragments in cold mode, and vice versa.
127+
if (Cold != ColdPartSource.count(Address))
128+
continue;
112129
MapTy &Map = MapEntry.second;
113130
const uint32_t NumEntries = Map.size();
114131
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
115132
<< Twine::utohexstr(Address) << ".\n");
116133
encodeULEB128(Address - PrevAddress, OS);
117134
PrevAddress = Address;
135+
if (Cold) {
136+
size_t HotIndex =
137+
std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
138+
encodeULEB128(HotIndex - PrevIndex, OS);
139+
PrevIndex = HotIndex;
140+
}
118141
encodeULEB128(NumEntries, OS);
119142
uint64_t InOffset = 0, OutOffset = 0;
120143
// Output and Input addresses and delta-encoded
@@ -124,20 +147,6 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
124147
std::tie(OutOffset, InOffset) = KeyVal;
125148
}
126149
}
127-
const uint32_t NumColdEntries = ColdPartSource.size();
128-
LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
129-
<< " cold part mappings.\n");
130-
encodeULEB128(NumColdEntries, OS);
131-
for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
132-
encodeULEB128(ColdEntry.first, OS);
133-
encodeULEB128(ColdEntry.second, OS);
134-
LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
135-
<< Twine::utohexstr(ColdEntry.second) << "\n");
136-
}
137-
138-
outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
139-
outs() << "BOLT-INFO: Wrote " << NumColdEntries
140-
<< " BAT cold-to-hot entries\n";
141150
}
142151

143152
std::error_code BoltAddressTranslation::parse(StringRef Buf) {
@@ -160,12 +169,31 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
160169
return make_error_code(llvm::errc::io_error);
161170

162171
Error Err(Error::success());
172+
std::vector<uint64_t> HotFuncs;
173+
parseMaps</*Cold=*/false>(HotFuncs, DE, Offset, Err);
174+
parseMaps</*Cold=*/true>(HotFuncs, DE, Offset, Err);
175+
outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
176+
return errorToErrorCode(std::move(Err));
177+
}
178+
179+
template <bool Cold>
180+
void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
181+
DataExtractor &DE, uint64_t &Offset,
182+
Error &Err) {
163183
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
164-
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
184+
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
185+
<< " functions\n");
186+
size_t HotIndex = 0;
165187
uint64_t PrevAddress = 0;
166188
for (uint32_t I = 0; I < NumFunctions; ++I) {
167189
const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
168190
PrevAddress = Address;
191+
if (Cold) {
192+
HotIndex += DE.getULEB128(&Offset, &Err);
193+
ColdPartSource.emplace(Address, HotFuncs[HotIndex]);
194+
} else {
195+
HotFuncs.push_back(Address);
196+
}
169197
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
170198
MapTy Map;
171199

@@ -178,28 +206,14 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
178206
OutputOffset += OutputDelta;
179207
InputOffset += InputDelta;
180208
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
181-
LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputOffset) << " -> "
182-
<< Twine::utohexstr(InputOffset) << " (" << OutputDelta
183-
<< ", " << InputDelta << ")\n");
209+
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b)\n",
210+
OutputOffset, InputOffset, OutputDelta,
211+
encodeULEB128(OutputDelta, nulls()),
212+
InputDelta,
213+
encodeSLEB128(InputDelta, nulls())));
184214
}
185215
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
186216
}
187-
188-
const uint32_t NumColdEntries = DE.getULEB128(&Offset, &Err);
189-
LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
190-
for (uint32_t I = 0; I < NumColdEntries; ++I) {
191-
const uint32_t ColdAddress = DE.getULEB128(&Offset, &Err);
192-
const uint32_t HotAddress = DE.getULEB128(&Offset, &Err);
193-
ColdPartSource.insert(
194-
std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
195-
LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
196-
<< Twine::utohexstr(HotAddress) << "\n");
197-
}
198-
outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
199-
outs() << "BOLT-INFO: Parsed " << NumColdEntries
200-
<< " BAT cold-to-hot entries\n";
201-
202-
return errorToErrorCode(std::move(Err));
203217
}
204218

205219
void BoltAddressTranslation::dump(raw_ostream &OS) {

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@
3636
#
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
39-
# CHECK: BOLT-INFO: Wrote 3 BAT cold-to-hot entries
40-
# CHECK: BOLT-INFO: BAT section size (bytes): 428
39+
# CHECK: BOLT-INFO: BAT section size (bytes): 404
4140
#
4241
# usqrt mappings (hot part). We match against any key (left side containing
4342
# the bolted binary offsets) because BOLT may change where it puts instructions

0 commit comments

Comments
 (0)