Skip to content

Commit 54e18b2

Browse files
committed
[lld-macho] Optimize rebase opcode generation
This commit reduces the size of the emitted rebase sections by generating the REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB and REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB opcodes. With this change, chromium_framework's rebase section is a 40% smaller 197 kilobytes, down from the previous 320 kB. That is 6 kB smaller than what ld64 produces for the same input. Performance figures from my M1 Mac mini: x before + after N Min Max Median Avg Stddev x 10 4.2269349 4.3300061 4.2689675 4.2690016 0.031151669 + 10 4.219331 4.2914009 4.2398136 4.2448277 0.023817308 No difference proven at 95.0% confidence Differential Revision: https://reviews.llvm.org/D130180
1 parent 08db089 commit 54e18b2

File tree

2 files changed

+161
-74
lines changed

2 files changed

+161
-74
lines changed

lld/MachO/SyntheticSections.cpp

Lines changed: 100 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -164,82 +164,129 @@ RebaseSection::RebaseSection()
164164
: LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
165165

166166
namespace {
167-
struct Rebase {
168-
OutputSegment *segment = nullptr;
169-
uint64_t offset = 0;
170-
uint64_t consecutiveCount = 0;
167+
struct RebaseState {
168+
uint64_t sequenceLength;
169+
uint64_t skipLength;
171170
};
172171
} // namespace
173172

174-
// Rebase opcodes allow us to describe a contiguous sequence of rebase location
175-
// using a single DO_REBASE opcode. To take advantage of it, we delay emitting
176-
// `DO_REBASE` until we have reached the end of a contiguous sequence.
177-
static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) {
178-
assert(rebase.consecutiveCount != 0);
179-
if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) {
180-
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
181-
rebase.consecutiveCount);
173+
static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
174+
assert(incr != 0);
175+
176+
if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
177+
(incr % target->wordSize) == 0) {
178+
os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
179+
(incr >> target->p2WordSize));
182180
} else {
183-
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
184-
encodeULEB128(rebase.consecutiveCount, os);
181+
os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
182+
encodeULEB128(incr, os);
185183
}
186-
rebase.consecutiveCount = 0;
187184
}
188185

189-
static void encodeRebase(const OutputSection *osec, uint64_t outSecOff,
190-
Rebase &lastRebase, raw_svector_ostream &os) {
191-
OutputSegment *seg = osec->parent;
192-
uint64_t offset = osec->getSegmentOffset() + outSecOff;
193-
if (lastRebase.segment != seg || lastRebase.offset != offset) {
194-
if (lastRebase.consecutiveCount != 0)
195-
encodeDoRebase(lastRebase, os);
196-
197-
if (lastRebase.segment != seg) {
198-
os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
199-
seg->index);
200-
encodeULEB128(offset, os);
201-
lastRebase.segment = seg;
202-
lastRebase.offset = offset;
186+
static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
187+
assert(state.sequenceLength > 0);
188+
189+
if (state.skipLength == target->wordSize) {
190+
if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
191+
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
192+
state.sequenceLength);
203193
} else {
204-
assert(lastRebase.offset != offset);
205-
uint64_t delta = offset - lastRebase.offset;
206-
// For unknown reasons, ld64 checks if the scaled offset is strictly less
207-
// than REBASE_IMMEDIATE_MASK instead of allowing equality. We match this
208-
// behavior as a precaution.
209-
if ((delta % target->wordSize == 0) &&
210-
(delta / target->wordSize < REBASE_IMMEDIATE_MASK)) {
211-
os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
212-
(delta / target->wordSize));
213-
} else {
214-
os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
215-
encodeULEB128(delta, os);
216-
}
217-
lastRebase.offset = offset;
194+
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
195+
encodeULEB128(state.sequenceLength, os);
196+
}
197+
} else if (state.sequenceLength == 1) {
198+
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
199+
encodeULEB128(state.skipLength - target->wordSize, os);
200+
} else {
201+
os << static_cast<uint8_t>(
202+
REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
203+
encodeULEB128(state.sequenceLength, os);
204+
encodeULEB128(state.skipLength - target->wordSize, os);
205+
}
206+
}
207+
208+
// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
209+
// memory location at a specific address to be rebased and/or the address to be
210+
// incremented.
211+
//
212+
// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
213+
// one, encoding a series of evenly spaced addresses. This algorithm works by
214+
// splitting up the sorted list of addresses into such chunks. If the locations
215+
// are consecutive or the sequence consists of a single location, flushRebase
216+
// will use a smaller, more specialized encoding.
217+
static void encodeRebases(const OutputSegment *seg,
218+
MutableArrayRef<Location> locations,
219+
raw_svector_ostream &os) {
220+
// dyld operates on segments. Translate section offsets into segment offsets.
221+
for (Location &loc : locations)
222+
loc.offset =
223+
loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(loc.offset);
224+
// The algorithm assumes that locations are unique.
225+
Location *end =
226+
llvm::unique(locations, [](const Location &a, const Location &b) {
227+
return a.offset == b.offset;
228+
});
229+
size_t count = end - locations.begin();
230+
231+
os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
232+
seg->index);
233+
assert(!locations.empty());
234+
uint64_t offset = locations[0].offset;
235+
encodeULEB128(offset, os);
236+
237+
RebaseState state{1, target->wordSize};
238+
239+
for (size_t i = 1; i < count; ++i) {
240+
offset = locations[i].offset;
241+
242+
uint64_t skip = offset - locations[i - 1].offset;
243+
assert(skip != 0 && "duplicate locations should have been weeded out");
244+
245+
if (skip == state.skipLength) {
246+
++state.sequenceLength;
247+
} else if (state.sequenceLength == 1) {
248+
++state.sequenceLength;
249+
state.skipLength = skip;
250+
} else if (skip < state.skipLength) {
251+
// The address is lower than what the rebase pointer would be if the last
252+
// location would be part of a sequence. We start a new sequence from the
253+
// previous location.
254+
--state.sequenceLength;
255+
flushRebase(state, os);
256+
257+
state.sequenceLength = 2;
258+
state.skipLength = skip;
259+
} else {
260+
// The address is at some positive offset from the rebase pointer. We
261+
// start a new sequence which begins with the current location.
262+
flushRebase(state, os);
263+
emitIncrement(skip - state.skipLength, os);
264+
state.sequenceLength = 1;
265+
state.skipLength = target->wordSize;
218266
}
219267
}
220-
++lastRebase.consecutiveCount;
221-
// DO_REBASE causes dyld to both perform the binding and increment the offset
222-
lastRebase.offset += target->wordSize;
268+
flushRebase(state, os);
223269
}
224270

225271
void RebaseSection::finalizeContents() {
226272
if (locations.empty())
227273
return;
228274

229275
raw_svector_ostream os{contents};
230-
Rebase lastRebase;
231-
232276
os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
233277

234278
llvm::sort(locations, [](const Location &a, const Location &b) {
235279
return a.isec->getVA(a.offset) < b.isec->getVA(b.offset);
236280
});
237-
for (const Location &loc : locations)
238-
encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase,
239-
os);
240-
if (lastRebase.consecutiveCount != 0)
241-
encodeDoRebase(lastRebase, os);
242281

282+
for (size_t i = 0, count = locations.size(); i < count;) {
283+
const OutputSegment *seg = locations[i].isec->parent->parent;
284+
size_t j = i + 1;
285+
while (j < count && locations[j].isec->parent->parent == seg)
286+
++j;
287+
encodeRebases(seg, {locations.data() + i, locations.data() + j}, os);
288+
i = j;
289+
}
243290
os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
244291
}
245292

lld/test/MachO/rebase-opcodes.s

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,82 @@
44
# RUN: %lld -dylib %t.o -o %t.dylib
55
# RUN: obj2yaml %t.dylib | FileCheck %s
66

7-
## Test that:
8-
## 1/ Consecutive rebases are encoded as REBASE_OPCODE_DO_REBASE_IMM_TIMES.
9-
## 2/ Gaps smaller than 15 words are encoded as REBASE_OPCODE_ADD_ADDR_IMM_SCALED.
10-
## 3/ Gaps larger than that become REBASE_OPCODE_ADD_ADDR_ULEB.
11-
## FIXME: The last rebase could be transformed into a REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB.
7+
.text
8+
.globl _foo
9+
_foo:
1210

11+
.data
1312
# CHECK: RebaseOpcodes:
1413
# CHECK-NEXT: Opcode: REBASE_OPCODE_SET_TYPE_IMM
1514
# CHECK-NEXT: Imm: 1
1615
# CHECK-NEXT: Opcode: REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1716
# CHECK-NEXT: Imm: 1
1817
# CHECK-NEXT: ExtraData: [ 0x0 ]
19-
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
20-
# CHECK-NEXT: Imm: 1
21-
# CHECK-NEXT: Opcode: REBASE_OPCODE_ADD_ADDR_IMM_SCALED
22-
# CHECK-NEXT: Imm: 14
18+
19+
## 1/ Single rebases with a gap after them are encoded as REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB.
20+
.quad _foo
21+
.space 16
22+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
23+
# CHECK-NEXT: Imm: 0
24+
# CHECK-NEXT: ExtraData: [ 0x10 ]
25+
26+
## 2/ Consecutive rebases are encoded as REBASE_OPCODE_DO_REBASE_IMM_TIMES.
27+
.quad _foo
28+
.quad _foo
29+
.quad _foo
2330
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
2431
# CHECK-NEXT: Imm: 3
25-
# CHECK-NEXT: Opcode: REBASE_OPCODE_ADD_ADDR_ULEB
32+
33+
## 3/ Gaps smaller than 16 words are encoded as REBASE_OPCODE_ADD_ADDR_IMM_SCALED.
34+
.space 120
35+
# CHECK-NEXT: Opcode: REBASE_OPCODE_ADD_ADDR_IMM_SCALED
36+
# CHECK-NEXT: Imm: 15
37+
38+
## 4/ Rebases with equal gaps betwen them are encoded as REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB.
39+
.quad _foo
40+
.space 16
41+
.quad _foo
42+
.space 16
43+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
2644
# CHECK-NEXT: Imm: 0
27-
# CHECK-NEXT: ExtraData: [ 0x78 ]
28-
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
29-
# CHECK-NEXT: Imm: 1
30-
# CHECK-NEXT: Opcode: REBASE_OPCODE_DONE
45+
# CHECK-NEXT: ExtraData: [ 0x2, 0x10 ]
46+
47+
## 5/ Rebase does not become a part of DO_REBASE_ULEB_TIMES_SKIPPING_ULEB if the next rebase is closer than the gap.
48+
.quad _foo
49+
.space 8
50+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
3151
# CHECK-NEXT: Imm: 0
52+
# CHECK-NEXT: ExtraData: [ 0x8 ]
3253

54+
.quad _foo
55+
.quad _foo
56+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
57+
# CHECK-NEXT: Imm: 2
3358

34-
.text
35-
.globl _foo
36-
_foo:
59+
## 6/ Large gaps are encoded as REBASE_OPCODE_ADD_ADDR_ULEB.
60+
.space 128
61+
# CHECK-NEXT: Opcode: REBASE_OPCODE_ADD_ADDR_ULEB
62+
# CHECK-NEXT: Imm: 0
63+
# CHECK-NEXT: ExtraData: [ 0x80 ]
3764

38-
.data
39-
.quad _foo
40-
.space 112
4165
.quad _foo
66+
.space 8
4267
.quad _foo
68+
.space 8
4369
.quad _foo
44-
.space 120
70+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
71+
# CHECK-NEXT: Imm: 0
72+
# CHECK-NEXT: ExtraData: [ 0x3, 0x8 ]
73+
74+
75+
## 7/ An add opcode is emitted if the next relocation is farther away than the DO_REBASE_ULEB_TIMES_SKIPPING_ULEB gap.
76+
.space 16
4577
.quad _foo
78+
# CHECK-NEXT: Opcode: REBASE_OPCODE_ADD_ADDR_IMM_SCALED
79+
# CHECK-NEXT: Imm: 1
80+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
81+
# CHECK-NEXT: Imm: 1
82+
83+
## 8/ The rebase section is terminated by REBASE_OPCODE_DONE.
84+
# CHECK-NEXT: Opcode: REBASE_OPCODE_DONE
85+
# CHECK-NEXT: Imm: 0

0 commit comments

Comments
 (0)