Skip to content

Commit 565f40d

Browse files
committed
[BOLT] Encode BAT using ULEB128 (#76899)
Reduces BAT section size, bytes: - large binary: 38676872 -> 23262524 (0.60x), - medium binary (trunk clang): 5938004 -> 3213504 (0.54x), - small binary (X86/bolt-address-translation.test): 1436 -> 680 (0.47x). Test Plan: Updated bolt/test/X86/bolt-address-translation.test
1 parent a7cf0a1 commit 565f40d

File tree

4 files changed

+40
-50
lines changed

4 files changed

+40
-50
lines changed

bolt/docs/BAT.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,24 @@ Fragment linkage header
5959

6060
### Functions table
6161
Header:
62-
| Entry | Width | Description |
62+
| Entry | Encoding | Description |
6363
| ------ | ----- | ----------- |
64-
| `NumFuncs` | 4B | Number of functions in the functions table |
64+
| `NumFuncs` | ULEB128 | Number of functions in the functions table |
6565

6666
The header is followed by Functions table with `NumFuncs` entries.
67-
| Entry | Width | Description |
67+
| Entry | Encoding | Description |
6868
| ------ | ------| ----------- |
69-
| `Address` | 8B | Function address in the output binary |
70-
| `NumEntries` | 4B | Number of address translation entries for a function |
69+
| `Address` | ULEB128 | Function address in the output binary |
70+
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
7171

7272
Function header is followed by `NumEntries` pairs of offsets for current
7373
function.
7474

7575
### Address translation table
76-
| Entry | Width | Description |
76+
| Entry | Encoding | Description |
7777
| ------ | ------| ----------- |
78-
| `OutputAddr` | 4B | Function offset in output binary |
79-
| `InputAddr` | 4B | Function offset in input binary with `BRANCHENTRY` top bit |
78+
| `OutputAddr` | ULEB128 | Function offset in output binary |
79+
| `InputAddr` | ULEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
8080

8181
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
8282
(branch or call instruction). If not set, it signifies a control flow target
@@ -86,12 +86,12 @@ function.
8686
Following Functions table, fragment linkage table is encoded to link split
8787
cold fragments with main (hot) fragment.
8888
Header:
89-
| Entry | Width | Description |
89+
| Entry | Encoding | Description |
9090
| ------ | ------------ | ----------- |
91-
| `NumColdEntries` | 4B | Number of split functions in the functions table |
91+
| `NumColdEntries` | ULEB128 | Number of split functions in the functions table |
9292

9393
`NumColdEntries` pairs of addresses follow:
94-
| Entry | Width | Description |
94+
| Entry | Encoding | Description |
9595
| ------ | ------| ----------- |
96-
| `ColdAddress` | 8B | Cold fragment address in output binary |
97-
| `HotAddress` | 8B | Hot fragment address in output binary |
96+
| `ColdAddress` | ULEB128 | Cold fragment address in output binary |
97+
| `HotAddress` | ULEB128 | Hot fragment address in output binary |

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class BoltAddressTranslation {
125125

126126
/// Identifies the address of a control-flow changing instructions in a
127127
/// translation map entry
128-
const static uint32_t BRANCHENTRY = 0x80000000;
128+
const static uint32_t BRANCHENTRY = 0x1;
129129
};
130130
} // namespace bolt
131131

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include "bolt/Core/BinaryFunction.h"
1111
#include "llvm/Support/DataExtractor.h"
1212
#include "llvm/Support/Errc.h"
13+
#include "llvm/Support/Error.h"
14+
#include "llvm/Support/LEB128.h"
1315

1416
#define DEBUG_TYPE "bolt-bat"
1517

@@ -44,7 +46,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
4446
// and this deleted block will both share the same output address (the same
4547
// key), and we need to map back. We choose here to privilege the successor by
4648
// allowing it to overwrite the previously inserted key in the map.
47-
Map[BBOutputOffset] = BBInputOffset;
49+
Map[BBOutputOffset] = BBInputOffset << 1;
4850

4951
const auto &IOAddressMap =
5052
BB.getFunction()->getBinaryContext().getIOAddressMap();
@@ -61,8 +63,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
6163

6264
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
6365
<< Twine::utohexstr(InputOffset) << " (branch)\n");
64-
Map.insert(
65-
std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
66+
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
67+
(InputOffset << 1) | BRANCHENTRY));
6668
}
6769
}
6870

@@ -102,28 +104,28 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
102104
}
103105

104106
const uint32_t NumFuncs = Maps.size();
105-
OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
107+
encodeULEB128(NumFuncs, OS);
106108
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
107109
for (auto &MapEntry : Maps) {
108110
const uint64_t Address = MapEntry.first;
109111
MapTy &Map = MapEntry.second;
110112
const uint32_t NumEntries = Map.size();
111113
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
112114
<< Twine::utohexstr(Address) << ".\n");
113-
OS.write(reinterpret_cast<const char *>(&Address), 8);
114-
OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
115+
encodeULEB128(Address, OS);
116+
encodeULEB128(NumEntries, OS);
115117
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
116-
OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
117-
OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
118+
encodeULEB128(KeyVal.first, OS);
119+
encodeULEB128(KeyVal.second, OS);
118120
}
119121
}
120122
const uint32_t NumColdEntries = ColdPartSource.size();
121123
LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
122124
<< " cold part mappings.\n");
123-
OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
125+
encodeULEB128(NumColdEntries, OS);
124126
for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
125-
OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
126-
OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
127+
encodeULEB128(ColdEntry.first, OS);
128+
encodeULEB128(ColdEntry.second, OS);
127129
LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
128130
<< Twine::utohexstr(ColdEntry.second) << "\n");
129131
}
@@ -152,43 +154,31 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
152154
if (Name.substr(0, 4) != "BOLT")
153155
return make_error_code(llvm::errc::io_error);
154156

155-
if (Buf.size() - Offset < 4)
156-
return make_error_code(llvm::errc::io_error);
157-
158-
const uint32_t NumFunctions = DE.getU32(&Offset);
157+
Error Err(Error::success());
158+
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
159159
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
160160
for (uint32_t I = 0; I < NumFunctions; ++I) {
161-
if (Buf.size() - Offset < 12)
162-
return make_error_code(llvm::errc::io_error);
163-
164-
const uint64_t Address = DE.getU64(&Offset);
165-
const uint32_t NumEntries = DE.getU32(&Offset);
161+
const uint64_t Address = DE.getULEB128(&Offset, &Err);
162+
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
166163
MapTy Map;
167164

168165
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
169166
<< Twine::utohexstr(Address) << "\n");
170-
if (Buf.size() - Offset < 8 * NumEntries)
171-
return make_error_code(llvm::errc::io_error);
172167
for (uint32_t J = 0; J < NumEntries; ++J) {
173-
const uint32_t OutputAddr = DE.getU32(&Offset);
174-
const uint32_t InputAddr = DE.getU32(&Offset);
168+
const uint32_t OutputAddr = DE.getULEB128(&Offset, &Err);
169+
const uint32_t InputAddr = DE.getULEB128(&Offset, &Err);
175170
Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
176171
LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
177172
<< Twine::utohexstr(InputAddr) << "\n");
178173
}
179174
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
180175
}
181176

182-
if (Buf.size() - Offset < 4)
183-
return make_error_code(llvm::errc::io_error);
184-
185-
const uint32_t NumColdEntries = DE.getU32(&Offset);
177+
const uint32_t NumColdEntries = DE.getULEB128(&Offset, &Err);
186178
LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
187179
for (uint32_t I = 0; I < NumColdEntries; ++I) {
188-
if (Buf.size() - Offset < 16)
189-
return make_error_code(llvm::errc::io_error);
190-
const uint32_t ColdAddress = DE.getU64(&Offset);
191-
const uint32_t HotAddress = DE.getU64(&Offset);
180+
const uint32_t ColdAddress = DE.getULEB128(&Offset, &Err);
181+
const uint32_t HotAddress = DE.getULEB128(&Offset, &Err);
192182
ColdPartSource.insert(
193183
std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
194184
LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
@@ -198,7 +188,7 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
198188
outs() << "BOLT-INFO: Parsed " << NumColdEntries
199189
<< " BAT cold-to-hot entries\n";
200190

201-
return std::error_code();
191+
return errorToErrorCode(std::move(Err));
202192
}
203193

204194
void BoltAddressTranslation::dump(raw_ostream &OS) {
@@ -209,7 +199,7 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
209199
OS << "BB mappings:\n";
210200
for (const auto &Entry : MapEntry.second) {
211201
const bool IsBranch = Entry.second & BRANCHENTRY;
212-
const uint32_t Val = Entry.second & ~BRANCHENTRY;
202+
const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
213203
OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
214204
<< "0x" << Twine::utohexstr(Val);
215205
if (IsBranch)
@@ -244,7 +234,7 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
244234

245235
--KeyVal;
246236

247-
const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
237+
const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
248238
// Branch source addresses are translated to the first instruction of the
249239
// source BB to avoid accounting for modifications BOLT may have made in the
250240
// BB regarding deletion/addition of instructions.

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
3939
# CHECK: BOLT-INFO: Wrote 3 BAT cold-to-hot entries
40-
# CHECK: BOLT-INFO: BAT section size (bytes): 1436
40+
# CHECK: BOLT-INFO: BAT section size (bytes): 680
4141
#
4242
# usqrt mappings (hot part). We match against any key (left side containing
4343
# the bolted binary offsets) because BOLT may change where it puts instructions

0 commit comments

Comments
 (0)