Skip to content

Commit d1d9545

Browse files
authored
[BOLT][BAT] Add entries for deleted basic blocks
Deleted basic blocks are required for correct mapping of branches modified by SCTC. Increases BAT size, bytes: - large binary: 8622496 -> 8703244. - small binary (X86/bolt-address-translation.test): 928 -> 940. Test Plan: updated bb-with-two-tail-calls.s Reviewers: ayermolo, dcci, maksfb, rafaelauler Reviewed By: rafaelauler Pull Request: #91906
1 parent 2918973 commit d1d9545

File tree

7 files changed

+39
-11
lines changed

7 files changed

+39
-11
lines changed

bolt/docs/BAT.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,14 @@ equals output offset.
106106
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
107107
(branch or call instruction). If not set, it signifies a control flow target
108108
(basic block offset).
109+
109110
`InputAddr` is omitted for equal offsets in input and output function. In this
110111
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.
111112

113+
Deleted basic blocks are emitted as having `OutputOffset` equal to the size of
114+
the function. They don't affect address translation and only participate in
115+
input basic block mapping.
116+
112117
### Secondary Entry Points table
113118
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
114119
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class BinaryFunction;
7070
class BoltAddressTranslation {
7171
public:
7272
// In-memory representation of the address translation table
73-
using MapTy = std::map<uint32_t, uint32_t>;
73+
using MapTy = std::multimap<uint32_t, uint32_t>;
7474

7575
// List of taken fall-throughs
7676
using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
@@ -218,6 +218,7 @@ class BoltAddressTranslation {
218218
auto begin() const { return Map.begin(); }
219219
auto end() const { return Map.end(); }
220220
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
221+
auto size() const { return Map.size(); }
221222
};
222223

223224
/// Map function output address to its hash and basic blocks hash map.

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void BoltAddressTranslation::writeEntriesForBB(
5454
// and this deleted block will both share the same output address (the same
5555
// key), and we need to map back. We choose here to privilege the successor by
5656
// allowing it to overwrite the previously inserted key in the map.
57-
Map[BBOutputOffset] = BBInputOffset << 1;
57+
Map.emplace(BBOutputOffset, BBInputOffset << 1);
5858

5959
const auto &IOAddressMap =
6060
BB.getFunction()->getBinaryContext().getIOAddressMap();
@@ -71,8 +71,7 @@ void BoltAddressTranslation::writeEntriesForBB(
7171

7272
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
7373
<< Twine::utohexstr(InputOffset) << " (branch)\n");
74-
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
75-
(InputOffset << 1) | BRANCHENTRY));
74+
Map.emplace(OutputOffset, (InputOffset << 1) | BRANCHENTRY);
7675
}
7776
}
7877

@@ -107,6 +106,19 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
107106
for (const BinaryBasicBlock *const BB :
108107
Function.getLayout().getMainFragment())
109108
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
109+
// Add entries for deleted blocks. They are still required for correct BB
110+
// mapping of branches modified by SCTC. By convention, they would have the
111+
// end of the function as output address.
112+
const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress);
113+
if (BBHashMap.size() != Function.size()) {
114+
const uint64_t EndOffset = Function.getOutputSize();
115+
std::unordered_set<uint32_t> MappedInputOffsets;
116+
for (const BinaryBasicBlock &BB : Function)
117+
MappedInputOffsets.emplace(BB.getInputOffset());
118+
for (const auto &[InputOffset, _] : BBHashMap)
119+
if (!llvm::is_contained(MappedInputOffsets, InputOffset))
120+
Map.emplace(EndOffset, InputOffset << 1);
121+
}
110122
Maps.emplace(Function.getOutputAddress(), std::move(Map));
111123
ReverseMap.emplace(OutputAddress, InputAddress);
112124

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,11 +2349,11 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23492349
BAT->getBBHashMap(FuncAddress);
23502350
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
23512351

2352-
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
2353-
YamlBB.Index = Idx;
2354-
2355-
for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
2356-
YamlBF.Blocks[BI->second.Index].Hash = BI->second.Hash;
2352+
for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
2353+
const auto &Block = Entry.second;
2354+
YamlBB.Hash = Block.Hash;
2355+
YamlBB.Index = Block.Index;
2356+
}
23572357

23582358
// Lookup containing basic block offset and index
23592359
auto getBlock = [&BlockMap](uint32_t Offset) {

bolt/test/X86/bb-with-two-tail-calls.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,21 @@
88
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
99
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --lite=0 --dyno-stats \
1010
# RUN: --print-sctc --print-only=_start -enable-bat 2>&1 | FileCheck %s
11+
# RUN: llvm-objdump --syms %t.out > %t.log
12+
# RUN: llvm-bat-dump %t.out --dump-all >> %t.log
13+
# RUN: FileCheck %s --input-file %t.log --check-prefix=CHECK-BAT
14+
1115
# CHECK-NOT: Assertion `BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"' failed.
1216
# Two tail calls in the same basic block after SCTC:
1317
# CHECK: {{.*}}: ja {{.*}} # TAILCALL # Offset: 7 # CTCTakenCount: 4
1418
# CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 13
1519

20+
# Confirm that a deleted basic block is emitted at function end offset (0xe)
21+
# CHECK-BAT: [[#%x,ADDR:]] g .text [[#%x,SIZE:]] _start
22+
# CHECK-BAT: Function Address: 0x[[#%x,ADDR]]
23+
# CHECK-BAT: 0x[[#%x,SIZE]]
24+
# CHECK-BAT: NumBlocks: 5
25+
1626
.globl _start
1727
_start:
1828
je x

bolt/test/X86/bolt-address-translation-yaml.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
4141

4242
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
4343
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
44-
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 384
44+
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
4545

4646
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
4747
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
3939
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
40-
# CHECK: BOLT-INFO: BAT section size (bytes): 928
40+
# CHECK: BOLT-INFO: BAT section size (bytes): 940
4141
#
4242
# usqrt mappings (hot part). We match against any key (left side containing
4343
# the bolted binary offsets) because BOLT may change where it puts instructions

0 commit comments

Comments
 (0)