Skip to content

Commit 7832769

Browse files
authored
Revert "[lld] Support thumb PLTs" (#93631)
Reverts #86223 windows pre-merge is broken.
1 parent bd135c3 commit 7832769

File tree

4 files changed

+53
-262
lines changed

4 files changed

+53
-262
lines changed

lld/ELF/Arch/ARM.cpp

Lines changed: 53 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -231,71 +231,36 @@ static void writePltHeaderLong(uint8_t *buf) {
231231
// The default PLT header requires the .got.plt to be within 128 Mb of the
232232
// .plt in the positive direction.
233233
void ARM::writePltHeader(uint8_t *buf) const {
234-
if (config->armThumbPLTs) {
235-
// The instruction sequence for thumb:
236-
//
237-
// 0: b500 push {lr}
238-
// 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
239-
// 6: 44fe add lr, pc
240-
// 8: f85e ff08 ldr pc, [lr, #8]!
241-
// e: .word .got.plt - .plt - 16
242-
//
243-
// At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
244-
// `pc` in the add instruction and 8 bytes for the `lr` adjustment.
245-
//
246-
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
247-
assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
248-
write16(buf + 0, 0xb500);
249-
// Split into two halves to support endianness correctly.
250-
write16(buf + 2, 0xf8df);
251-
write16(buf + 4, 0xe008);
252-
write16(buf + 6, 0x44fe);
253-
// Split into two halves to support endianness correctly.
254-
write16(buf + 8, 0xf85e);
255-
write16(buf + 10, 0xff08);
256-
write32(buf + 12, offset);
257-
258-
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
259-
memcpy(buf + 20, trapInstr.data(), 4);
260-
memcpy(buf + 24, trapInstr.data(), 4);
261-
memcpy(buf + 28, trapInstr.data(), 4);
262-
} else {
263-
// Use a similar sequence to that in writePlt(), the difference is the
264-
// calling conventions mean we use lr instead of ip. The PLT entry is
265-
// responsible for saving lr on the stack, the dynamic loader is responsible
266-
// for reloading it.
267-
const uint32_t pltData[] = {
268-
0xe52de004, // L1: str lr, [sp,#-4]!
269-
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
270-
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
271-
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
272-
};
273-
274-
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
275-
if (!llvm::isUInt<27>(offset)) {
276-
// We cannot encode the Offset, use the long form.
277-
writePltHeaderLong(buf);
278-
return;
279-
}
280-
write32(buf + 0, pltData[0]);
281-
write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
282-
write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
283-
write32(buf + 12, pltData[3] | (offset & 0xfff));
284-
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
285-
memcpy(buf + 20, trapInstr.data(), 4);
286-
memcpy(buf + 24, trapInstr.data(), 4);
287-
memcpy(buf + 28, trapInstr.data(), 4);
234+
// Use a similar sequence to that in writePlt(), the difference is the calling
235+
// conventions mean we use lr instead of ip. The PLT entry is responsible for
236+
// saving lr on the stack, the dynamic loader is responsible for reloading
237+
// it.
238+
const uint32_t pltData[] = {
239+
0xe52de004, // L1: str lr, [sp,#-4]!
240+
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
241+
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
242+
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
243+
};
244+
245+
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
246+
if (!llvm::isUInt<27>(offset)) {
247+
// We cannot encode the Offset, use the long form.
248+
writePltHeaderLong(buf);
249+
return;
288250
}
251+
write32(buf + 0, pltData[0]);
252+
write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
253+
write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
254+
write32(buf + 12, pltData[3] | (offset & 0xfff));
255+
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
256+
memcpy(buf + 20, trapInstr.data(), 4);
257+
memcpy(buf + 24, trapInstr.data(), 4);
258+
memcpy(buf + 28, trapInstr.data(), 4);
289259
}
290260

291261
void ARM::addPltHeaderSymbols(InputSection &isec) const {
292-
if (config->armThumbPLTs) {
293-
addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
294-
addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
295-
} else {
296-
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
297-
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
298-
}
262+
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
263+
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
299264
}
300265

301266
// Long form PLT entries that do not have any restrictions on the displacement
@@ -314,65 +279,32 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
314279
// .plt in the positive direction.
315280
void ARM::writePlt(uint8_t *buf, const Symbol &sym,
316281
uint64_t pltEntryAddr) const {
282+
// The PLT entry is similar to the example given in Appendix A of ELF for
283+
// the Arm Architecture. Instead of using the Group Relocations to find the
284+
// optimal rotation for the 8-bit immediate used in the add instructions we
285+
// hard code the most compact rotations for simplicity. This saves a load
286+
// instruction over the long plt sequences.
287+
const uint32_t pltData[] = {
288+
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
289+
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
290+
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
291+
};
317292

318-
if (!config->armThumbPLTs) {
319-
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
320-
321-
// The PLT entry is similar to the example given in Appendix A of ELF for
322-
// the Arm Architecture. Instead of using the Group Relocations to find the
323-
// optimal rotation for the 8-bit immediate used in the add instructions we
324-
// hard code the most compact rotations for simplicity. This saves a load
325-
// instruction over the long plt sequences.
326-
const uint32_t pltData[] = {
327-
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
328-
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
329-
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
330-
};
331-
if (!llvm::isUInt<27>(offset)) {
332-
// We cannot encode the Offset, use the long form.
333-
writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
334-
return;
335-
}
336-
write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
337-
write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
338-
write32(buf + 8, pltData[2] | (offset & 0xfff));
339-
memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
340-
} else {
341-
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
342-
assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
343-
344-
// A PLT entry will be:
345-
//
346-
// movw ip, #<lower 16 bits>
347-
// movt ip, #<upper 16 bits>
348-
// add ip, pc
349-
// L1: ldr.w pc, [ip]
350-
// b L1
351-
//
352-
// where ip = r12 = 0xc
353-
354-
// movw ip, #<lower 16 bits>
355-
write16(buf + 2, 0x0c00); // use `ip`
356-
relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
357-
358-
// movt ip, #<upper 16 bits>
359-
write16(buf + 6, 0x0c00); // use `ip`
360-
relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
361-
362-
write16(buf + 8, 0x44fc); // add ip, pc
363-
write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)
364-
write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half)
365-
write16(buf + 14, 0xe7fc); // Branch to previous instruction
293+
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
294+
if (!llvm::isUInt<27>(offset)) {
295+
// We cannot encode the Offset, use the long form.
296+
writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
297+
return;
366298
}
299+
write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
300+
write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
301+
write32(buf + 8, pltData[2] | (offset & 0xfff));
302+
memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
367303
}
368304

369305
void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
370-
if (config->armThumbPLTs) {
371-
addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
372-
} else {
373-
addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
374-
addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
375-
}
306+
addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
307+
addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
376308
}
377309

378310
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -393,8 +325,6 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
393325
case R_ARM_JUMP24:
394326
// Source is ARM, all PLT entries are ARM so no interworking required.
395327
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
396-
assert(!config->armThumbPLTs &&
397-
"If the source is ARM, we should not need Thumb PLTs");
398328
if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
399329
return true;
400330
[[fallthrough]];
@@ -405,9 +335,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
405335
}
406336
case R_ARM_THM_JUMP19:
407337
case R_ARM_THM_JUMP24:
408-
// Source is Thumb, when all PLT entries are ARM interworking is required.
338+
// Source is Thumb, all PLT entries are ARM so interworking is required.
409339
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
410-
if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0))
340+
if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
411341
return true;
412342
[[fallthrough]];
413343
case R_ARM_THM_CALL: {
@@ -617,6 +547,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
617547
// STT_FUNC we choose whether to write a BL or BLX depending on the
618548
// value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
619549
// not of type STT_FUNC then we must preserve the original instruction.
550+
// PLT entries are always ARM state so we know we don't need to interwork.
620551
assert(rel.sym); // R_ARM_CALL is always reached via relocate().
621552
bool bit0Thumb = val & 1;
622553
bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;
@@ -675,13 +606,12 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
675606
// PLT entries are always ARM state so we know we need to interwork.
676607
assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
677608
bool bit0Thumb = val & 1;
678-
bool useThumb = bit0Thumb || config->armThumbPLTs;
679609
bool isBlx = (read16(loc + 2) & 0x1000) == 0;
680610
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
681-
// even when type not STT_FUNC.
682-
if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)
611+
// even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
612+
if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
683613
stateChangeWarning(loc, rel.type, *rel.sym);
684-
if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
614+
if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
685615
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
686616
// the BLX instruction may only be two byte aligned. This must be done
687617
// before overflow check.

lld/ELF/Config.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ struct Config {
217217
bool allowMultipleDefinition;
218218
bool fatLTOObjects;
219219
bool androidPackDynRelocs = false;
220-
bool armThumbPLTs = false;
221220
bool armHasBlx = false;
222221
bool armHasMovtMovw = false;
223222
bool armJ1J2BranchEncoding = false;

lld/ELF/InputFiles.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,6 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
194194
if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
195195
profile == ARMBuildAttrs::MicroControllerProfile)
196196
config->armCMSESupport = true;
197-
198-
// The thumb PLT entries require Thumb2 which can be used on multiple archs.
199-
// For now, let's limit it to ones where ARM isn't available and we know have
200-
// Thumb2.
201-
std::optional<unsigned> armISA =
202-
attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
203-
std::optional<unsigned> thumb =
204-
attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
205-
bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed;
206-
bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
207-
if (noArmISA && hasThumb2)
208-
config->armThumbPLTs = true;
209197
}
210198

211199
InputFile::InputFile(Kind k, MemoryBufferRef m)

lld/test/ELF/armv8-thumb-plt-reloc.s

Lines changed: 0 additions & 126 deletions
This file was deleted.

0 commit comments

Comments
 (0)