[lld-macho] Optimize rebase opcode generation

BertalanD · BertalanD · commit 54e18b23972a · 2022-07-21T10:00:39.000+02:00
This commit reduces the size of the emitted rebase sections by generating the REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB and REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB opcodes. With this change, chromium_framework's rebase section is a 40% smaller 197 kilobytes, down from the previous 320 kB. That is 6 kB smaller than what ld64 produces for the same input. Performance figures from my M1 Mac mini: x before + after N Min Max Median Avg Stddev x 10 4.2269349 4.3300061 4.2689675 4.2690016 0.031151669 + 10 4.219331 4.2914009 4.2398136 4.2448277 0.023817308 No difference proven at 95.0% confidence Differential Revision: https://reviews.llvm.org/D130180
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
@@ -164,82 +164,129 @@ RebaseSection::RebaseSection()
     : LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
 
 namespace {
-struct Rebase {
-  OutputSegment *segment = nullptr;
-  uint64_t offset = 0;
-  uint64_t consecutiveCount = 0;
+struct RebaseState {
+  uint64_t sequenceLength;
+  uint64_t skipLength;
 };
 } // namespace
 
-// Rebase opcodes allow us to describe a contiguous sequence of rebase location
-// using a single DO_REBASE opcode. To take advantage of it, we delay emitting
-// `DO_REBASE` until we have reached the end of a contiguous sequence.
-static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) {
-  assert(rebase.consecutiveCount != 0);
-  if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) {
-    os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
-                               rebase.consecutiveCount);
+static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
+  assert(incr != 0);
+
+  if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
+      (incr % target->wordSize) == 0) {
+    os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
+                               (incr >> target->p2WordSize));
   } else {
-    os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
-    encodeULEB128(rebase.consecutiveCount, os);
+    os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
+    encodeULEB128(incr, os);
   }
-  rebase.consecutiveCount = 0;
 }
 
-static void encodeRebase(const OutputSection *osec, uint64_t outSecOff,
-                         Rebase &lastRebase, raw_svector_ostream &os) {
-  OutputSegment *seg = osec->parent;
-  uint64_t offset = osec->getSegmentOffset() + outSecOff;
-  if (lastRebase.segment != seg || lastRebase.offset != offset) {
-    if (lastRebase.consecutiveCount != 0)
-      encodeDoRebase(lastRebase, os);
-
-    if (lastRebase.segment != seg) {
-      os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
-                                 seg->index);
-      encodeULEB128(offset, os);
-      lastRebase.segment = seg;
-      lastRebase.offset = offset;
+static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
+  assert(state.sequenceLength > 0);
+
+  if (state.skipLength == target->wordSize) {
+    if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
+      os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
+                                 state.sequenceLength);
     } else {
-      assert(lastRebase.offset != offset);
-      uint64_t delta = offset - lastRebase.offset;
-      // For unknown reasons, ld64 checks if the scaled offset is strictly less
-      // than REBASE_IMMEDIATE_MASK instead of allowing equality. We match this
-      // behavior as a precaution.
-      if ((delta % target->wordSize == 0) &&
-          (delta / target->wordSize < REBASE_IMMEDIATE_MASK)) {
-        os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED |
-                                   (delta / target->wordSize));
-      } else {
-        os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
-        encodeULEB128(delta, os);
-      }
-      lastRebase.offset = offset;
+      os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
+      encodeULEB128(state.sequenceLength, os);
+    }
+  } else if (state.sequenceLength == 1) {
+    os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
+    encodeULEB128(state.skipLength - target->wordSize, os);
+  } else {
+    os << static_cast<uint8_t>(
+        REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
+    encodeULEB128(state.sequenceLength, os);
+    encodeULEB128(state.skipLength - target->wordSize, os);
+  }
+}
+
+// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
+// memory location at a specific address to be rebased and/or the address to be
+// incremented.
+//
+// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
+// one, encoding a series of evenly spaced addresses. This algorithm works by
+// splitting up the sorted list of addresses into such chunks. If the locations
+// are consecutive or the sequence consists of a single location, flushRebase
+// will use a smaller, more specialized encoding.
+static void encodeRebases(const OutputSegment *seg,
+                          MutableArrayRef<Location> locations,
+                          raw_svector_ostream &os) {
+  // dyld operates on segments. Translate section offsets into segment offsets.
+  for (Location &loc : locations)
+    loc.offset =
+        loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(loc.offset);
+  // The algorithm assumes that locations are unique.
+  Location *end =
+      llvm::unique(locations, [](const Location &a, const Location &b) {
+        return a.offset == b.offset;
+      });
+  size_t count = end - locations.begin();
+
+  os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+                             seg->index);
+  assert(!locations.empty());
+  uint64_t offset = locations[0].offset;
+  encodeULEB128(offset, os);
+
+  RebaseState state{1, target->wordSize};
+
+  for (size_t i = 1; i < count; ++i) {
+    offset = locations[i].offset;
+
+    uint64_t skip = offset - locations[i - 1].offset;
+    assert(skip != 0 && "duplicate locations should have been weeded out");
+
+    if (skip == state.skipLength) {
+      ++state.sequenceLength;
+    } else if (state.sequenceLength == 1) {
+      ++state.sequenceLength;
+      state.skipLength = skip;
+    } else if (skip < state.skipLength) {
+      // The address is lower than what the rebase pointer would be if the last
+      // location would be part of a sequence. We start a new sequence from the
+      // previous location.
+      --state.sequenceLength;
+      flushRebase(state, os);
+
+      state.sequenceLength = 2;
+      state.skipLength = skip;
+    } else {
+      // The address is at some positive offset from the rebase pointer. We
+      // start a new sequence which begins with the current location.
+      flushRebase(state, os);
+      emitIncrement(skip - state.skipLength, os);
+      state.sequenceLength = 1;
+      state.skipLength = target->wordSize;
     }
   }
-  ++lastRebase.consecutiveCount;
-  // DO_REBASE causes dyld to both perform the binding and increment the offset
-  lastRebase.offset += target->wordSize;
+  flushRebase(state, os);
 }
 
 void RebaseSection::finalizeContents() {
   if (locations.empty())
     return;
 
   raw_svector_ostream os{contents};
-  Rebase lastRebase;
-
   os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
 
   llvm::sort(locations, [](const Location &a, const Location &b) {
     return a.isec->getVA(a.offset) < b.isec->getVA(b.offset);
   });
-  for (const Location &loc : locations)
-    encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase,
-                 os);
-  if (lastRebase.consecutiveCount != 0)
-    encodeDoRebase(lastRebase, os);
 
+  for (size_t i = 0, count = locations.size(); i < count;) {
+    const OutputSegment *seg = locations[i].isec->parent->parent;
+    size_t j = i + 1;
+    while (j < count && locations[j].isec->parent->parent == seg)
+      ++j;
+    encodeRebases(seg, {locations.data() + i, locations.data() + j}, os);
+    i = j;
+  }
   os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
 }
 
diff --git a/lld/test/MachO/rebase-opcodes.s b/lld/test/MachO/rebase-opcodes.s
@@ -4,42 +4,82 @@
 # RUN: %lld -dylib %t.o -o %t.dylib
 # RUN: obj2yaml %t.dylib | FileCheck %s
 
-## Test that:
-## 1/ Consecutive rebases are encoded as REBASE_OPCODE_DO_REBASE_IMM_TIMES.
-## 2/ Gaps smaller than 15 words are encoded as REBASE_OPCODE_ADD_ADDR_IMM_SCALED.
-## 3/ Gaps larger than that become REBASE_OPCODE_ADD_ADDR_ULEB.
-## FIXME: The last rebase could be transformed into a REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB.
+.text
+.globl _foo
+_foo:
 
+.data
 # CHECK: RebaseOpcodes:
 # CHECK-NEXT: Opcode:          REBASE_OPCODE_SET_TYPE_IMM
 # CHECK-NEXT: Imm:             1
 # CHECK-NEXT: Opcode:          REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
 # CHECK-NEXT: Imm:             1
 # CHECK-NEXT: ExtraData:       [ 0x0 ]
-# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_IMM_TIMES
-# CHECK-NEXT: Imm:             1
-# CHECK-NEXT: Opcode:          REBASE_OPCODE_ADD_ADDR_IMM_SCALED
-# CHECK-NEXT: Imm:             14
+
+## 1/ Single rebases with a gap after them are encoded as REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB.
+.quad _foo
+.space 16
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
+# CHECK-NEXT: Imm:             0
+# CHECK-NEXT: ExtraData:       [ 0x10 ]
+
+## 2/ Consecutive rebases are encoded as REBASE_OPCODE_DO_REBASE_IMM_TIMES.
+.quad _foo
+.quad _foo
+.quad _foo
 # CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_IMM_TIMES
 # CHECK-NEXT: Imm:             3
-# CHECK-NEXT: Opcode:          REBASE_OPCODE_ADD_ADDR_ULEB
+
+## 3/ Gaps smaller than 16 words are encoded as REBASE_OPCODE_ADD_ADDR_IMM_SCALED.
+.space 120
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_ADD_ADDR_IMM_SCALED
+# CHECK-NEXT: Imm:             15
+
+## 4/ Rebases with equal gaps betwen them are encoded as REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB.
+.quad _foo
+.space 16
+.quad _foo
+.space 16
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
 # CHECK-NEXT: Imm:             0
-# CHECK-NEXT: ExtraData:       [ 0x78 ]
-# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_IMM_TIMES
-# CHECK-NEXT: Imm:             1
-# CHECK-NEXT: Opcode:          REBASE_OPCODE_DONE
+# CHECK-NEXT: ExtraData:       [ 0x2, 0x10 ]
+
+## 5/ Rebase does not become a part of DO_REBASE_ULEB_TIMES_SKIPPING_ULEB if the next rebase is closer than the gap.
+.quad _foo
+.space 8
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
 # CHECK-NEXT: Imm:             0
+# CHECK-NEXT: ExtraData:       [ 0x8 ]
 
+.quad _foo
+.quad _foo
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_IMM_TIMES
+# CHECK-NEXT: Imm:             2
 
-.text
-.globl _foo
-_foo:
+## 6/ Large gaps are encoded as REBASE_OPCODE_ADD_ADDR_ULEB.
+.space 128
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_ADD_ADDR_ULEB
+# CHECK-NEXT: Imm:             0
+# CHECK-NEXT: ExtraData:       [ 0x80 ]
 
-.data
-.quad _foo
-.space 112
 .quad _foo
+.space 8
 .quad _foo
+.space 8
 .quad _foo
-.space 120
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
+# CHECK-NEXT: Imm:             0
+# CHECK-NEXT: ExtraData:       [ 0x3, 0x8 ]
+
+
+## 7/ An add opcode is emitted if the next relocation is farther away than the DO_REBASE_ULEB_TIMES_SKIPPING_ULEB gap.
+.space 16
 .quad _foo
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_ADD_ADDR_IMM_SCALED
+# CHECK-NEXT: Imm:             1
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DO_REBASE_IMM_TIMES
+# CHECK-NEXT: Imm:             1
+
+## 8/ The rebase section is terminated by REBASE_OPCODE_DONE.
+# CHECK-NEXT: Opcode:          REBASE_OPCODE_DONE
+# CHECK-NEXT: Imm:             0