Skip to content

Commit 4f19a83

Browse files
committed
[WIP][lld] Support thumb PLTs for cortex-M
1 parent 7564566 commit 4f19a83

File tree

3 files changed

+169
-49
lines changed

3 files changed

+169
-49
lines changed

lld/ELF/Arch/ARM.cpp

Lines changed: 164 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -231,36 +231,68 @@ static void writePltHeaderLong(uint8_t *buf) {
231231
// The default PLT header requires the .got.plt to be within 128 Mb of the
232232
// .plt in the positive direction.
233233
void ARM::writePltHeader(uint8_t *buf) const {
234-
// Use a similar sequence to that in writePlt(), the difference is the calling
235-
// conventions mean we use lr instead of ip. The PLT entry is responsible for
236-
// saving lr on the stack, the dynamic loader is responsible for reloading
237-
// it.
238-
const uint32_t pltData[] = {
239-
0xe52de004, // L1: str lr, [sp,#-4]!
240-
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
241-
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
242-
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
243-
};
234+
if (!config->armAlwaysThumb) {
235+
// Use a similar sequence to that in writePlt(), the difference is the calling
236+
// conventions mean we use lr instead of ip. The PLT entry is responsible for
237+
// saving lr on the stack, the dynamic loader is responsible for reloading
238+
// it.
239+
const uint32_t pltData[] = {
240+
0xe52de004, // L1: str lr, [sp,#-4]!
241+
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
242+
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
243+
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
244+
};
245+
246+
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
247+
if (!llvm::isUInt<27>(offset)) {
248+
// We cannot encode the Offset, use the long form.
249+
writePltHeaderLong(buf);
250+
return;
251+
}
252+
write32(buf + 0, pltData[0]);
253+
write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
254+
write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
255+
write32(buf + 12, pltData[3] | (offset & 0xfff));
256+
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
257+
memcpy(buf + 20, trapInstr.data(), 4);
258+
memcpy(buf + 24, trapInstr.data(), 4);
259+
memcpy(buf + 28, trapInstr.data(), 4);
260+
} else {
261+
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
244262

245-
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
246-
if (!llvm::isUInt<27>(offset)) {
247-
// We cannot encode the Offset, use the long form.
248-
writePltHeaderLong(buf);
249-
return;
263+
if (!llvm::isUInt<32>(offset)) {
264+
// We cannot encode the Offset, use the long form.
265+
llvm::errs() << "TODO: Implement long thumb plt header?\n";
266+
__builtin_trap();
267+
}
268+
// 32: b500 push {lr}
269+
// 34: f8df e008 ldr.w lr, [pc, #0x8] @ 0x40 <func+0x40>
270+
// 38: 44fe add lr, pc
271+
// 3a: f85e ff08 ldr pc, [lr, #8]!
272+
write16(buf + 0, 0xb500);
273+
write32(buf + 2, 0xe008f8df);
274+
write16(buf + 6, 0x44fe);
275+
write32(buf + 8, 0xff08f85e);
276+
buf[12] = (offset >> 0) & 0xff;
277+
buf[13] = (offset >> 8) & 0xff;
278+
buf[14] = (offset >> 16) & 0xff;
279+
buf[15] = (offset >> 24) & 0xff;
280+
281+
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
282+
memcpy(buf + 20, trapInstr.data(), 4);
283+
memcpy(buf + 24, trapInstr.data(), 4);
284+
memcpy(buf + 28, trapInstr.data(), 4);
250285
}
251-
write32(buf + 0, pltData[0]);
252-
write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
253-
write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
254-
write32(buf + 12, pltData[3] | (offset & 0xfff));
255-
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
256-
memcpy(buf + 20, trapInstr.data(), 4);
257-
memcpy(buf + 24, trapInstr.data(), 4);
258-
memcpy(buf + 28, trapInstr.data(), 4);
259286
}
260287

261288
void ARM::addPltHeaderSymbols(InputSection &isec) const {
262-
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
263-
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
289+
if (!config->armAlwaysThumb) {
290+
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
291+
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
292+
} else {
293+
addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
294+
addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
295+
}
264296
}
265297

266298
// Long form PLT entries that do not have any restrictions on the displacement
@@ -279,32 +311,114 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
279311
// .plt in the positive direction.
280312
void ARM::writePlt(uint8_t *buf, const Symbol &sym,
281313
uint64_t pltEntryAddr) const {
282-
// The PLT entry is similar to the example given in Appendix A of ELF for
283-
// the Arm Architecture. Instead of using the Group Relocations to find the
284-
// optimal rotation for the 8-bit immediate used in the add instructions we
285-
// hard code the most compact rotations for simplicity. This saves a load
286-
// instruction over the long plt sequences.
287-
const uint32_t pltData[] = {
288-
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
289-
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
290-
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
291-
};
292314

293-
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
294-
if (!llvm::isUInt<27>(offset)) {
295-
// We cannot encode the Offset, use the long form.
296-
writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
297-
return;
315+
if (!config->armAlwaysThumb) {
316+
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
317+
//llvm::errs() << "sym: " << sym.getName() << "\n";
318+
//llvm::errs() << "offset: " << (void*)offset << "\n";
319+
320+
// The PLT entry is similar to the example given in Appendix A of ELF for
321+
// the Arm Architecture. Instead of using the Group Relocations to find the
322+
// optimal rotation for the 8-bit immediate used in the add instructions we
323+
// hard code the most compact rotations for simplicity. This saves a load
324+
// instruction over the long plt sequences.
325+
const uint32_t pltData[] = {
326+
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
327+
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
328+
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
329+
};
330+
if (!llvm::isUInt<27>(offset)) {
331+
// We cannot encode the Offset, use the long form.
332+
writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
333+
return;
334+
}
335+
write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
336+
write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
337+
write32(buf + 8, pltData[2] | (offset & 0xfff));
338+
memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
339+
} else {
340+
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
341+
//llvm::errs() << "sym: " << sym.getName() << "\n";
342+
//llvm::errs() << "offset: " << (void*)offset << "\n";
343+
344+
if (!llvm::isUInt<32>(offset)) {
345+
llvm::errs() << "TODO: Implement long thumb plt?\n";
346+
__builtin_trap();
347+
}
348+
// MOVW: https://developer.arm.com/documentation/ddi0308/d/Thumb-Instructions/Alphabetical-list-of-Thumb-instructions/MOV--immediate-
349+
// MOVT: https://developer.arm.com/documentation/ddi0308/d/Thumb-Instructions/Alphabetical-list-of-Thumb-instructions/MOVT
350+
// Emit
351+
//
352+
// movw ip, #<lower 16 bits>
353+
// movt ip, #<upper 16 bits>
354+
// add ip, pc
355+
// ldr.w pc, [ip]
356+
//
357+
// where ip = r12 = 0xc
358+
//
359+
constexpr uint32_t pltData[] = {
360+
0x0c00f240, // movw ip, <offset lower 16>
361+
0x0c00f2c0, // movt ip, <offset higher 16>
362+
};
363+
// movw encoding:
364+
//
365+
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
366+
// 1 1 1 1 0 i 1 0 0 1 0 0 imm4
367+
//
368+
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
369+
// 0 imm3 Rd0 imm8
370+
//
371+
// imm16 = imm4:i:imm3:imm8, i = bit 11
372+
//
373+
uint16_t offset_lower = offset & 0xffff;
374+
//llvm::errs() << "offset_lower: " << format_hex(offset_lower, 4) << "\n";
375+
uint32_t movwImm8 = offset_lower & 0xff;
376+
uint32_t movwImm3 = (offset_lower >> 8) & 0x7;
377+
uint32_t movwI = (offset_lower >> 11) & 0x1;
378+
uint32_t movwImm4 = (offset_lower >> 12) & 0xf;
379+
uint32_t movwBits = (movwI << 10) | (movwImm4 << 0) | (movwImm3 << 28) | (movwImm8 << 16);
380+
//uint32_t movwBits = (movwI << 26) | (movwImm4 << 16) | (movwImm3 << 12) | movwImm8;
381+
//llvm::errs() << "movwBits: " << format_hex(movwBits, 4) << "\n";
382+
write32(buf + 0, pltData[0] | movwBits);
383+
384+
// movt encoding:
385+
//
386+
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
387+
// 1 1 1 1 0 i 1 0 1 1 0 0 imm4
388+
//
389+
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
390+
// 0 imm3 Rd0 imm8
391+
//
392+
// imm16 = imm4:i:imm3:imm8, i = bit 11
393+
//
394+
uint16_t offset_upper = static_cast<uint16_t>(offset >> 16);
395+
//llvm::errs() << "offset_upper: " << format_hex(offset_upper, 4) << "\n";
396+
uint32_t movtImm8 = offset_upper & 0xff;
397+
uint32_t movtImm3 = (offset_upper >> 8) & 0x7;
398+
uint32_t movtI = (offset_upper >> 11) & 0x1;
399+
uint32_t movtImm4 = (offset_upper >> 12) & 0xf;
400+
//uint32_t movtBits = (movtI << 26) | (movtImm4 << 16) | (movtImm3 << 12) | movtImm8;
401+
uint32_t movtBits = (movtI << 10) | (movtImm4 << 0) | (movtImm3 << 28) | (movtImm8 << 16);
402+
//llvm::errs() << "movtBits: " << format_hex(movtBits, 4) << "\n";
403+
write32(buf + 4, pltData[1] | movtBits);
404+
405+
write16(buf + 8, 0x44fc); // add ip, pc
406+
write32(buf + 10, 0xf000f8dc); // ldr.w pc, [ip]
407+
//write32(buf + 10, 0xf8dcf000); // ldr.w pc, [ip]
408+
write16(buf + 14, 0xe7fc); // Branch to the previous instruction.
409+
//memcpy(buf + 14, trapInstr.data(), 2); // Pad to 16-byte boundary
410+
411+
// The PLT size for ARM is 16 bytes and the above sequence is 14 bytes so we could potentially fit one more instruction.
298412
}
299-
write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
300-
write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
301-
write32(buf + 8, pltData[2] | (offset & 0xfff));
302-
memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
303413
}
304414

305415
void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
306-
addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
307-
addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
416+
if (!config->armAlwaysThumb) {
417+
addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
418+
addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
419+
} else {
420+
addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
421+
}
308422
}
309423

310424
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -337,7 +451,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
337451
case R_ARM_THM_JUMP24:
338452
// Source is Thumb, all PLT entries are ARM so interworking is required.
339453
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
340-
if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
454+
if ((expr == R_PLT_PC && !config->armAlwaysThumb) || (s.isFunc() && (s.getVA() & 1) == 0))
341455
return true;
342456
[[fallthrough]];
343457
case R_ARM_THM_CALL: {
@@ -606,12 +720,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
606720
// PLT entries are always ARM state so we know we need to interwork.
607721
assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
608722
bool bit0Thumb = val & 1;
723+
bool useThumb = bit0Thumb || config->armAlwaysThumb;
609724
bool isBlx = (read16(loc + 2) & 0x1000) == 0;
610725
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
611726
// even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
612-
if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
727+
if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)
613728
stateChangeWarning(loc, rel.type, *rel.sym);
614-
if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
729+
if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
615730
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
616731
// the BLX instruction may only be two byte aligned. This must be done
617732
// before overflow check.

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ struct Config {
212212
bool allowMultipleDefinition;
213213
bool fatLTOObjects;
214214
bool androidPackDynRelocs = false;
215+
bool armAlwaysThumb = false;
215216
bool armHasBlx = false;
216217
bool armHasMovtMovw = false;
217218
bool armJ1J2BranchEncoding = false;

lld/ELF/InputFiles.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
194194
if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
195195
profile == ARMBuildAttrs::MicroControllerProfile)
196196
config->armCMSESupport = true;
197+
198+
// The Cortex-M processors only support Thumb.
199+
if (profile == ARMBuildAttrs::MicroControllerProfile)
200+
config->armAlwaysThumb = true;
197201
}
198202

199203
InputFile::InputFile(Kind k, MemoryBufferRef m)

0 commit comments

Comments
 (0)