Skip to content

Commit df7d2b2

Browse files
authored
[BOLT] Deduplicate equal offsets in BAT (#76905)
Encode BRANCHENTRY bits as bitmask for deduplicated entries. Reduces BAT section size: - large binary: to 11834216 bytes (0.31x original), - medium binary: to 1565584 bytes (0.26x original), - small binary: to 336 bytes (0.23x original). Test Plan: Updated bolt/test/X86/bolt-address-translation.test
1 parent 5bd87e6 commit df7d2b2

File tree

4 files changed

+103
-11
lines changed

4 files changed

+103
-11
lines changed

bolt/docs/BAT.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Cold functions table header
6161
```
6262

6363
### Functions table
64-
Hot and cold functions tables share the encoding except difference marked below.
64+
Hot and cold functions tables share the encoding except differences marked below.
6565
Header:
6666
| Entry | Encoding | Description |
6767
| ------ | ----- | ----------- |
@@ -80,18 +80,23 @@ Hot indices are delta encoded, implicitly starting at zero.
8080
| `Address` | Continuous, Delta, ULEB128 | Function address in the output binary |
8181
| `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table |
8282
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
83+
| `EqualElems` | ULEB128 | Hot functions only: number of equal offsets in the beginning of a function |
84+
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | Hot functions only: if `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit |
8385

84-
Function header is followed by `NumEntries` pairs of offsets for current
85-
function.
86+
Function header is followed by `EqualElems` offsets (hot functions only) and
87+
`NumEntries-EqualElems` (`NumEntries` for cold functions) pairs of offsets for
88+
current function.
8689

8790
### Address translation table
8891
Delta encoding means that only the difference with the previous corresponding
8992
entry is encoded. Input offsets implicitly start at zero.
9093
| Entry | Encoding | Description |
9194
| ------ | ------| ----------- |
9295
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
93-
| `InputOffset` | Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
96+
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
9497

9598
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
9699
(branch or call instruction). If not set, it signifies a control flow target
97100
(basic block offset).
101+
`InputAddr` is omitted for equal offsets in input and output function. In this
102+
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ class BoltAddressTranslation {
130130
void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
131131
DataExtractor &DE, uint64_t &Offset, Error &Err);
132132

133+
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
134+
/// entries in function address translation map.
135+
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
136+
137+
/// Calculate the number of equal offsets (output = input) in the beginning
138+
/// of the function.
139+
size_t getNumEqualOffsets(const MapTy &Map) const;
140+
133141
std::map<uint64_t, MapTy> Maps;
134142

135143
/// Links outlined cold bocks to their original function

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "bolt/Profile/BoltAddressTranslation.h"
1010
#include "bolt/Core/BinaryFunction.h"
11+
#include "llvm/ADT/APInt.h"
1112
#include "llvm/Support/Errc.h"
1213
#include "llvm/Support/Error.h"
1314
#include "llvm/Support/LEB128.h"
@@ -110,6 +111,34 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
110111
outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
111112
}
112113

114+
APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
115+
size_t EqualElems) {
116+
APInt BitMask(alignTo(EqualElems, 8), 0);
117+
size_t Index = 0;
118+
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
119+
if (Index == EqualElems)
120+
break;
121+
const uint32_t OutputOffset = KeyVal.second;
122+
if (OutputOffset & BRANCHENTRY)
123+
BitMask.setBit(Index);
124+
++Index;
125+
}
126+
return BitMask;
127+
}
128+
129+
size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
130+
size_t EqualOffsets = 0;
131+
for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
132+
const uint32_t OutputOffset = KeyVal.first;
133+
const uint32_t InputOffset = KeyVal.second >> 1;
134+
if (OutputOffset == InputOffset)
135+
++EqualOffsets;
136+
else
137+
break;
138+
}
139+
return EqualOffsets;
140+
}
141+
113142
template <bool Cold>
114143
void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
115144
uint64_t &PrevAddress, raw_ostream &OS) {
@@ -139,14 +168,35 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
139168
PrevIndex = HotIndex;
140169
}
141170
encodeULEB128(NumEntries, OS);
171+
// For hot fragments only: encode the number of equal offsets
172+
// (output = input) in the beginning of the function. Only encode one offset
173+
// in these cases.
174+
const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
175+
if (!Cold) {
176+
encodeULEB128(EqualElems, OS);
177+
if (EqualElems) {
178+
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
179+
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
180+
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
181+
BranchEntriesBytes);
182+
LLVM_DEBUG({
183+
dbgs() << "BranchEntries: ";
184+
SmallString<8> BitMaskStr;
185+
BranchEntries.toString(BitMaskStr, 2, false);
186+
dbgs() << BitMaskStr << '\n';
187+
});
188+
}
189+
}
190+
size_t Index = 0;
142191
uint64_t InOffset = 0;
143192
// Output and Input addresses and delta-encoded
144193
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
145194
const uint64_t OutputAddress = KeyVal.first + Address;
146195
encodeULEB128(OutputAddress - PrevAddress, OS);
147196
PrevAddress = OutputAddress;
148-
encodeSLEB128(KeyVal.second - InOffset, OS);
149-
InOffset = KeyVal.second;
197+
if (Index++ >= EqualElems)
198+
encodeSLEB128(KeyVal.second - InOffset, OS);
199+
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
150200
}
151201
}
152202
}
@@ -197,6 +247,29 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
197247
HotFuncs.push_back(Address);
198248
}
199249
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
250+
// Equal offsets, hot fragments only.
251+
size_t EqualElems = 0;
252+
APInt BEBitMask;
253+
if (!Cold) {
254+
EqualElems = DE.getULEB128(&Offset, &Err);
255+
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
256+
EqualElems, getULEB128Size(EqualElems)));
257+
if (EqualElems) {
258+
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
259+
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
260+
LoadIntFromMemory(
261+
BEBitMask,
262+
reinterpret_cast<const uint8_t *>(
263+
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
264+
BranchEntriesBytes);
265+
LLVM_DEBUG({
266+
dbgs() << "BEBitMask: ";
267+
SmallString<8> BitMaskStr;
268+
BEBitMask.toString(BitMaskStr, 2, false);
269+
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
270+
});
271+
}
272+
}
200273
MapTy Map;
201274

202275
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
@@ -207,14 +280,20 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
207280
const uint64_t OutputAddress = PrevAddress + OutputDelta;
208281
const uint64_t OutputOffset = OutputAddress - Address;
209282
PrevAddress = OutputAddress;
210-
const int64_t InputDelta = DE.getSLEB128(&Offset, &Err);
211-
InputOffset += InputDelta;
283+
int64_t InputDelta = 0;
284+
if (J < EqualElems) {
285+
InputOffset = (OutputOffset << 1) | BEBitMask[J];
286+
} else {
287+
InputDelta = DE.getSLEB128(&Offset, &Err);
288+
InputOffset += InputDelta;
289+
}
212290
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
213291
LLVM_DEBUG(
214292
dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
215293
OutputOffset, InputOffset, OutputDelta,
216-
encodeULEB128(OutputDelta, nulls()), InputDelta,
217-
encodeSLEB128(InputDelta, nulls()), OutputAddress));
294+
getULEB128Size(OutputDelta), InputDelta,
295+
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta),
296+
OutputAddress));
218297
}
219298
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
220299
}

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
39-
# CHECK: BOLT-INFO: BAT section size (bytes): 404
39+
# CHECK: BOLT-INFO: BAT section size (bytes): 336
4040
#
4141
# usqrt mappings (hot part). We match against any key (left side containing
4242
# the bolted binary offsets) because BOLT may change where it puts instructions

0 commit comments

Comments
 (0)