@@ -164,82 +164,129 @@ RebaseSection::RebaseSection()
164
164
: LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
165
165
166
166
namespace {
167
- struct Rebase {
168
- OutputSegment *segment = nullptr ;
169
- uint64_t offset = 0 ;
170
- uint64_t consecutiveCount = 0 ;
167
+ struct RebaseState {
168
+ uint64_t sequenceLength;
169
+ uint64_t skipLength;
171
170
};
172
171
} // namespace
173
172
174
- // Rebase opcodes allow us to describe a contiguous sequence of rebase location
175
- // using a single DO_REBASE opcode. To take advantage of it, we delay emitting
176
- // `DO_REBASE` until we have reached the end of a contiguous sequence.
177
- static void encodeDoRebase (Rebase &rebase, raw_svector_ostream &os) {
178
- assert (rebase.consecutiveCount != 0 );
179
- if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) {
180
- os << static_cast <uint8_t >(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
181
- rebase.consecutiveCount );
173
+ static void emitIncrement (uint64_t incr, raw_svector_ostream &os) {
174
+ assert (incr != 0 );
175
+
176
+ if ((incr >> target->p2WordSize ) <= REBASE_IMMEDIATE_MASK &&
177
+ (incr % target->wordSize ) == 0 ) {
178
+ os << static_cast <uint8_t >(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
179
+ (incr >> target->p2WordSize ));
182
180
} else {
183
- os << static_cast <uint8_t >(REBASE_OPCODE_DO_REBASE_ULEB_TIMES );
184
- encodeULEB128 (rebase. consecutiveCount , os);
181
+ os << static_cast <uint8_t >(REBASE_OPCODE_ADD_ADDR_ULEB );
182
+ encodeULEB128 (incr , os);
185
183
}
186
- rebase.consecutiveCount = 0 ;
187
184
}
188
185
189
- static void encodeRebase (const OutputSection *osec, uint64_t outSecOff,
190
- Rebase &lastRebase, raw_svector_ostream &os) {
191
- OutputSegment *seg = osec->parent ;
192
- uint64_t offset = osec->getSegmentOffset () + outSecOff;
193
- if (lastRebase.segment != seg || lastRebase.offset != offset) {
194
- if (lastRebase.consecutiveCount != 0 )
195
- encodeDoRebase (lastRebase, os);
196
-
197
- if (lastRebase.segment != seg) {
198
- os << static_cast <uint8_t >(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
199
- seg->index );
200
- encodeULEB128 (offset, os);
201
- lastRebase.segment = seg;
202
- lastRebase.offset = offset;
186
+ static void flushRebase (const RebaseState &state, raw_svector_ostream &os) {
187
+ assert (state.sequenceLength > 0 );
188
+
189
+ if (state.skipLength == target->wordSize ) {
190
+ if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
191
+ os << static_cast <uint8_t >(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
192
+ state.sequenceLength );
203
193
} else {
204
- assert (lastRebase.offset != offset);
205
- uint64_t delta = offset - lastRebase.offset ;
206
- // For unknown reasons, ld64 checks if the scaled offset is strictly less
207
- // than REBASE_IMMEDIATE_MASK instead of allowing equality. We match this
208
- // behavior as a precaution.
209
- if ((delta % target->wordSize == 0 ) &&
210
- (delta / target->wordSize < REBASE_IMMEDIATE_MASK)) {
211
- os << static_cast <uint8_t >(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
212
- (delta / target->wordSize ));
213
- } else {
214
- os << static_cast <uint8_t >(REBASE_OPCODE_ADD_ADDR_ULEB);
215
- encodeULEB128 (delta, os);
216
- }
217
- lastRebase.offset = offset;
194
+ os << static_cast <uint8_t >(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
195
+ encodeULEB128 (state.sequenceLength , os);
196
+ }
197
+ } else if (state.sequenceLength == 1 ) {
198
+ os << static_cast <uint8_t >(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
199
+ encodeULEB128 (state.skipLength - target->wordSize , os);
200
+ } else {
201
+ os << static_cast <uint8_t >(
202
+ REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
203
+ encodeULEB128 (state.sequenceLength , os);
204
+ encodeULEB128 (state.skipLength - target->wordSize , os);
205
+ }
206
+ }
207
+
208
+ // Rebases are communicated to dyld using a bytecode, whose opcodes cause the
209
+ // memory location at a specific address to be rebased and/or the address to be
210
+ // incremented.
211
+ //
212
+ // Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
213
+ // one, encoding a series of evenly spaced addresses. This algorithm works by
214
+ // splitting up the sorted list of addresses into such chunks. If the locations
215
+ // are consecutive or the sequence consists of a single location, flushRebase
216
+ // will use a smaller, more specialized encoding.
217
+ static void encodeRebases (const OutputSegment *seg,
218
+ MutableArrayRef<Location> locations,
219
+ raw_svector_ostream &os) {
220
+ // dyld operates on segments. Translate section offsets into segment offsets.
221
+ for (Location &loc : locations)
222
+ loc.offset =
223
+ loc.isec ->parent ->getSegmentOffset () + loc.isec ->getOffset (loc.offset );
224
+ // The algorithm assumes that locations are unique.
225
+ Location *end =
226
+ llvm::unique (locations, [](const Location &a, const Location &b) {
227
+ return a.offset == b.offset ;
228
+ });
229
+ size_t count = end - locations.begin ();
230
+
231
+ os << static_cast <uint8_t >(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
232
+ seg->index );
233
+ assert (!locations.empty ());
234
+ uint64_t offset = locations[0 ].offset ;
235
+ encodeULEB128 (offset, os);
236
+
237
+ RebaseState state{1 , target->wordSize };
238
+
239
+ for (size_t i = 1 ; i < count; ++i) {
240
+ offset = locations[i].offset ;
241
+
242
+ uint64_t skip = offset - locations[i - 1 ].offset ;
243
+ assert (skip != 0 && " duplicate locations should have been weeded out" );
244
+
245
+ if (skip == state.skipLength ) {
246
+ ++state.sequenceLength ;
247
+ } else if (state.sequenceLength == 1 ) {
248
+ ++state.sequenceLength ;
249
+ state.skipLength = skip;
250
+ } else if (skip < state.skipLength ) {
251
+ // The address is lower than what the rebase pointer would be if the last
252
+ // location would be part of a sequence. We start a new sequence from the
253
+ // previous location.
254
+ --state.sequenceLength ;
255
+ flushRebase (state, os);
256
+
257
+ state.sequenceLength = 2 ;
258
+ state.skipLength = skip;
259
+ } else {
260
+ // The address is at some positive offset from the rebase pointer. We
261
+ // start a new sequence which begins with the current location.
262
+ flushRebase (state, os);
263
+ emitIncrement (skip - state.skipLength , os);
264
+ state.sequenceLength = 1 ;
265
+ state.skipLength = target->wordSize ;
218
266
}
219
267
}
220
- ++lastRebase.consecutiveCount ;
221
- // DO_REBASE causes dyld to both perform the binding and increment the offset
222
- lastRebase.offset += target->wordSize ;
268
+ flushRebase (state, os);
223
269
}
224
270
225
271
void RebaseSection::finalizeContents () {
226
272
if (locations.empty ())
227
273
return ;
228
274
229
275
raw_svector_ostream os{contents};
230
- Rebase lastRebase;
231
-
232
276
os << static_cast <uint8_t >(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
233
277
234
278
llvm::sort (locations, [](const Location &a, const Location &b) {
235
279
return a.isec ->getVA (a.offset ) < b.isec ->getVA (b.offset );
236
280
});
237
- for (const Location &loc : locations)
238
- encodeRebase (loc.isec ->parent , loc.isec ->getOffset (loc.offset ), lastRebase,
239
- os);
240
- if (lastRebase.consecutiveCount != 0 )
241
- encodeDoRebase (lastRebase, os);
242
281
282
+ for (size_t i = 0 , count = locations.size (); i < count;) {
283
+ const OutputSegment *seg = locations[i].isec ->parent ->parent ;
284
+ size_t j = i + 1 ;
285
+ while (j < count && locations[j].isec ->parent ->parent == seg)
286
+ ++j;
287
+ encodeRebases (seg, {locations.data () + i, locations.data () + j}, os);
288
+ i = j;
289
+ }
243
290
os << static_cast <uint8_t >(REBASE_OPCODE_DONE);
244
291
}
245
292
0 commit comments