Skip to content

Commit 1e95349

Browse files
committed
Revert "ELF: Add branch-to-branch optimization."
This caused assertion failures in applyBranchToBranchOpt(): llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From*) [with To = lld::elf::InputSection; From = lld::elf::InputSectionBase]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed. See comment on the PR (#138366) This reverts commit 491b82a. This also reverts the follow-up "[lld] Use llvm::partition_point (NFC) (#145209)" This reverts commit 2ac293f.
1 parent 6d8d4cf commit 1e95349

13 files changed

+6
-462
lines changed

lld/ELF/Arch/AArch64.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include "Symbols.h"
1212
#include "SyntheticSections.h"
1313
#include "Target.h"
14-
#include "TargetImpl.h"
1514
#include "llvm/BinaryFormat/ELF.h"
1615
#include "llvm/Support/Endian.h"
1716

@@ -83,7 +82,6 @@ class AArch64 : public TargetInfo {
8382
uint64_t val) const override;
8483
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
8584
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
86-
void applyBranchToBranchOpt() const override;
8785

8886
private:
8987
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -976,62 +974,6 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
976974
}
977975
}
978976

979-
static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
980-
Relocation &r) {
981-
// Identify a control transfer relocation for the branch-to-branch
982-
// optimization. A "control transfer relocation" means a B or BL
983-
// target but it also includes relative vtable relocations for example.
984-
//
985-
// We require the relocation type to be JUMP26, CALL26 or PLT32. With a
986-
// relocation type of PLT32 the value may be assumed to be used for branching
987-
// directly to the symbol and the addend is only used to produce the relocated
988-
// value (hence the effective addend is always 0). This is because if a PLT is
989-
// needed the addend will be added to the address of the PLT, and it doesn't
990-
// make sense to branch into the middle of a PLT. For example, relative vtable
991-
// relocations use PLT32 and 0 or a positive value as the addend but still are
992-
// used to branch to the symbol.
993-
//
994-
// With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
995-
// addend is that we are branching to symbol+addend so that becomes the
996-
// effective addend.
997-
if (r.type == R_AARCH64_PLT32)
998-
return 0;
999-
if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
1000-
return r.addend;
1001-
return std::nullopt;
1002-
}
1003-
1004-
static std::pair<Relocation *, uint64_t>
1005-
getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1006-
auto *i = llvm::partition_point(
1007-
is.relocations, [&](Relocation &r) { return r.offset < offset; });
1008-
if (i != is.relocations.end() && i->offset == offset &&
1009-
i->type == R_AARCH64_JUMP26) {
1010-
return {i, i->addend};
1011-
}
1012-
return {nullptr, 0};
1013-
}
1014-
1015-
static void redirectControlTransferRelocations(Relocation &r1,
1016-
const Relocation &r2) {
1017-
r1.expr = r2.expr;
1018-
r1.sym = r2.sym;
1019-
// With PLT32 we must respect the original addend as that affects the value's
1020-
// interpretation. With the other relocation types the original addend is
1021-
// irrelevant because it referred to an offset within the original target
1022-
// section so we overwrite it.
1023-
if (r1.type == R_AARCH64_PLT32)
1024-
r1.addend += r2.addend;
1025-
else
1026-
r1.addend = r2.addend;
1027-
}
1028-
1029-
void AArch64::applyBranchToBranchOpt() const {
1030-
applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1031-
getBranchInfoAtTarget,
1032-
redirectControlTransferRelocations);
1033-
}
1034-
1035977
// AArch64 may use security features in variant PLT sequences. These are:
1036978
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
1037979
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used

lld/ELF/Arch/TargetImpl.h

Lines changed: 0 additions & 93 deletions
This file was deleted.

lld/ELF/Arch/X86_64.cpp

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include "Symbols.h"
1212
#include "SyntheticSections.h"
1313
#include "Target.h"
14-
#include "TargetImpl.h"
1514
#include "llvm/BinaryFormat/ELF.h"
1615
#include "llvm/Support/Endian.h"
1716
#include "llvm/Support/MathExtras.h"
@@ -50,7 +49,6 @@ class X86_64 : public TargetInfo {
5049
bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
5150
InputSection *nextIS) const override;
5251
bool relaxOnce(int pass) const override;
53-
void applyBranchToBranchOpt() const override;
5452

5553
private:
5654
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -1163,72 +1161,6 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
11631161
}
11641162
}
11651163

1166-
static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
1167-
Relocation &r) {
1168-
// Identify a control transfer relocation for the branch-to-branch
1169-
// optimization. A "control transfer relocation" usually means a CALL or JMP
1170-
// target but it also includes relative vtable relocations for example.
1171-
//
1172-
// We require the relocation type to be PLT32. With a relocation type of PLT32
1173-
// the value may be assumed to be used for branching directly to the symbol
1174-
// and the addend is only used to produce the relocated value (hence the
1175-
// effective addend is always 0). This is because if a PLT is needed the
1176-
// addend will be added to the address of the PLT, and it doesn't make sense
1177-
// to branch into the middle of a PLT. For example, relative vtable
1178-
// relocations use PLT32 and 0 or a positive value as the addend but still are
1179-
// used to branch to the symbol.
1180-
//
1181-
// STT_SECTION symbols are a special case on x86 because the LLVM assembler
1182-
// uses them for branches to local symbols which are assembled as referring to
1183-
// the section symbol with the addend equal to the symbol value - 4.
1184-
if (r.type == R_X86_64_PLT32) {
1185-
if (r.sym->isSection())
1186-
return r.addend + 4;
1187-
return 0;
1188-
}
1189-
return std::nullopt;
1190-
}
1191-
1192-
static std::pair<Relocation *, uint64_t>
1193-
getBranchInfoAtTarget(InputSection &is, uint64_t offset) {
1194-
auto content = is.contentMaybeDecompress();
1195-
if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate
1196-
auto *i = llvm::partition_point(
1197-
is.relocations, [&](Relocation &r) { return r.offset < offset + 1; });
1198-
// Unlike with getControlTransferAddend() it is valid to accept a PC32
1199-
// relocation here because we know that this is actually a JMP and not some
1200-
// other reference, so the interpretation is that we add 4 to the addend and
1201-
// use that as the effective addend.
1202-
if (i != is.relocations.end() && i->offset == offset + 1 &&
1203-
(i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) {
1204-
return {i, i->addend + 4};
1205-
}
1206-
}
1207-
return {nullptr, 0};
1208-
}
1209-
1210-
static void redirectControlTransferRelocations(Relocation &r1,
1211-
const Relocation &r2) {
1212-
// The isSection() check handles the STT_SECTION case described above.
1213-
// In that case the original addend is irrelevant because it referred to an
1214-
// offset within the original target section so we overwrite it.
1215-
//
1216-
// The +4 is here to compensate for r2.addend which will likely be -4,
1217-
// but may also be addend-4 in case of a PC32 branch to symbol+addend.
1218-
if (r1.sym->isSection())
1219-
r1.addend = r2.addend;
1220-
else
1221-
r1.addend += r2.addend + 4;
1222-
r1.expr = r2.expr;
1223-
r1.sym = r2.sym;
1224-
}
1225-
1226-
void X86_64::applyBranchToBranchOpt() const {
1227-
applyBranchToBranchOptImpl(ctx, getControlTransferAddend,
1228-
getBranchInfoAtTarget,
1229-
redirectControlTransferRelocations);
1230-
}
1231-
12321164
// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
12331165
// entries containing endbr64 instructions. A PLT entry will be split into two
12341166
// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).

lld/ELF/Config.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,6 @@ struct Config {
302302
bool bpFunctionOrderForCompression = false;
303303
bool bpDataOrderForCompression = false;
304304
bool bpVerboseSectionOrderer = false;
305-
bool branchToBranch = false;
306305
bool checkSections;
307306
bool checkDynamicRelocs;
308307
std::optional<llvm::DebugCompressionType> compressDebugSections;

lld/ELF/Driver.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,8 +1644,6 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
16441644
ctx.arg.zWxneeded = hasZOption(args, "wxneeded");
16451645
setUnresolvedSymbolPolicy(ctx, args);
16461646
ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
1647-
ctx.arg.branchToBranch = args.hasFlag(
1648-
OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2);
16491647

16501648
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
16511649
if (arg->getOption().matches(OPT_eb))

lld/ELF/InputSection.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,9 +430,8 @@ InputSectionBase *InputSection::getRelocatedSection() const {
430430

431431
template <class ELFT, class RelTy>
432432
void InputSection::copyRelocations(Ctx &ctx, uint8_t *buf) {
433-
bool linkerRelax =
434-
ctx.arg.relax && is_contained({EM_RISCV, EM_LOONGARCH}, ctx.arg.emachine);
435-
if (!ctx.arg.relocatable && (linkerRelax || ctx.arg.branchToBranch)) {
433+
if (ctx.arg.relax && !ctx.arg.relocatable &&
434+
(ctx.arg.emachine == EM_RISCV || ctx.arg.emachine == EM_LOONGARCH)) {
436435
// On LoongArch and RISC-V, relaxation might change relocations: copy
437436
// from internal ones that are updated by relaxation.
438437
InputSectionBase *sec = getRelocatedSection();

lld/ELF/Options.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,6 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">,
5959
MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">;
6060
def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">;
6161

62-
defm branch_to_branch: BB<"branch-to-branch",
63-
"Enable branch-to-branch optimization (default at -O2)",
64-
"Disable branch-to-branch optimization (default at -O0 and -O1)">;
65-
6662
defm check_sections: B<"check-sections",
6763
"Check section addresses for overlaps (default)",
6864
"Do not check section addresses for overlaps">;

lld/ELF/Relocations.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,10 +1665,9 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
16651665
}
16661666

16671667
// Sort relocations by offset for more efficient searching for
1668-
// R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization.
1668+
// R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
16691669
if (ctx.arg.emachine == EM_RISCV ||
1670-
(ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
1671-
ctx.arg.branchToBranch)
1670+
(ctx.arg.emachine == EM_PPC64 && sec->name == ".toc"))
16721671
llvm::stable_sort(sec->relocs(),
16731672
[](const Relocation &lhs, const Relocation &rhs) {
16741673
return lhs.offset < rhs.offset;
@@ -1959,9 +1958,6 @@ void elf::postScanRelocations(Ctx &ctx) {
19591958
for (ELFFileBase *file : ctx.objectFiles)
19601959
for (Symbol *sym : file->getLocalSymbols())
19611960
fn(*sym);
1962-
1963-
if (ctx.arg.branchToBranch)
1964-
ctx.target->applyBranchToBranchOpt();
19651961
}
19661962

19671963
static bool mergeCmp(const InputSection *a, const InputSection *b) {

lld/ELF/Target.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ class TargetInfo {
101101

102102
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
103103
JumpModType val) const {}
104-
virtual void applyBranchToBranchOpt() const {}
105104

106105
virtual ~TargetInfo();
107106

lld/docs/ReleaseNotes.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,6 @@ ELF Improvements
6262
on executable sections.
6363
(`#128883 <https://github.com/llvm/llvm-project/pull/128883>`_)
6464

65-
* For AArch64 and X86_64, added ``--branch-to-branch``, which rewrites branches
66-
that point to another branch instruction to instead branch directly to the
67-
target of the second instruction. Enabled by default at ``-O2``.
68-
6965
Breaking changes
7066
----------------
7167
* Executable-only and readable-executable sections are now allowed to be placed

lld/docs/ld.lld.1

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,6 @@ Bind default visibility defined STB_GLOBAL function symbols locally for
9393
.Fl shared.
9494
.It Fl -be8
9595
Write a Big Endian ELF File using BE8 format(AArch32 only)
96-
.It Fl -branch-to-branch
97-
Enable the branch-to-branch optimizations: a branch whose target is
98-
another branch instruction is rewritten to point to the latter branch
99-
target (AArch64 and X86_64 only). Enabled by default at
100-
.Fl O2 Ns .
10196
.It Fl -build-id Ns = Ns Ar value
10297
Generate a build ID note.
10398
.Ar value
@@ -419,7 +414,7 @@ If not specified,
419414
.Dv a.out
420415
is used as a default.
421416
.It Fl O Ns Ar value
422-
Optimize output file.
417+
Optimize output file size.
423418
.Ar value
424419
may be:
425420
.Pp
@@ -429,7 +424,7 @@ Disable string merging.
429424
.It Cm 1
430425
Enable string merging.
431426
.It Cm 2
432-
Enable string tail merging and branch-to-branch optimization.
427+
Enable string tail merging.
433428
.El
434429
.Pp
435430
.Fl O Ns Cm 1

0 commit comments

Comments
 (0)