@@ -231,36 +231,68 @@ static void writePltHeaderLong(uint8_t *buf) {
231
231
// The default PLT header requires the .got.plt to be within 128 Mb of the
232
232
// .plt in the positive direction.
233
233
void ARM::writePltHeader (uint8_t *buf) const {
234
- // Use a similar sequence to that in writePlt(), the difference is the calling
235
- // conventions mean we use lr instead of ip. The PLT entry is responsible for
236
- // saving lr on the stack, the dynamic loader is responsible for reloading
237
- // it.
238
- const uint32_t pltData[] = {
239
- 0xe52de004 , // L1: str lr, [sp,#-4]!
240
- 0xe28fe600 , // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
241
- 0xe28eea00 , // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
242
- 0xe5bef000 , // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
243
- };
234
+ if (!config->armAlwaysThumb ) {
235
+ // Use a similar sequence to that in writePlt(), the difference is the calling
236
+ // conventions mean we use lr instead of ip. The PLT entry is responsible for
237
+ // saving lr on the stack, the dynamic loader is responsible for reloading
238
+ // it.
239
+ const uint32_t pltData[] = {
240
+ 0xe52de004 , // L1: str lr, [sp,#-4]!
241
+ 0xe28fe600 , // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
242
+ 0xe28eea00 , // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
243
+ 0xe5bef000 , // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
244
+ };
245
+
246
+ uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 4 ;
247
+ if (!llvm::isUInt<27 >(offset)) {
248
+ // We cannot encode the Offset, use the long form.
249
+ writePltHeaderLong (buf);
250
+ return ;
251
+ }
252
+ write32 (buf + 0 , pltData[0 ]);
253
+ write32 (buf + 4 , pltData[1 ] | ((offset >> 20 ) & 0xff ));
254
+ write32 (buf + 8 , pltData[2 ] | ((offset >> 12 ) & 0xff ));
255
+ write32 (buf + 12 , pltData[3 ] | (offset & 0xfff ));
256
+ memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
257
+ memcpy (buf + 20 , trapInstr.data (), 4 );
258
+ memcpy (buf + 24 , trapInstr.data (), 4 );
259
+ memcpy (buf + 28 , trapInstr.data (), 4 );
260
+ } else {
261
+ uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 16 ;
244
262
245
- uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 4 ;
246
- if (!llvm::isUInt<27 >(offset)) {
247
- // We cannot encode the Offset, use the long form.
248
- writePltHeaderLong (buf);
249
- return ;
263
+ if (!llvm::isUInt<32 >(offset)) {
264
+ // We cannot encode the Offset, use the long form.
265
+ llvm::errs () << " TODO: Implement long thumb plt header?\n " ;
266
+ __builtin_trap ();
267
+ }
268
+ // 32: b500 push {lr}
269
+ // 34: f8df e008 ldr.w lr, [pc, #0x8] @ 0x40 <func+0x40>
270
+ // 38: 44fe add lr, pc
271
+ // 3a: f85e ff08 ldr pc, [lr, #8]!
272
+ write16 (buf + 0 , 0xb500 );
273
+ write32 (buf + 2 , 0xe008f8df );
274
+ write16 (buf + 6 , 0x44fe );
275
+ write32 (buf + 8 , 0xff08f85e );
276
+ buf[12 ] = (offset >> 0 ) & 0xff ;
277
+ buf[13 ] = (offset >> 8 ) & 0xff ;
278
+ buf[14 ] = (offset >> 16 ) & 0xff ;
279
+ buf[15 ] = (offset >> 24 ) & 0xff ;
280
+
281
+ memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
282
+ memcpy (buf + 20 , trapInstr.data (), 4 );
283
+ memcpy (buf + 24 , trapInstr.data (), 4 );
284
+ memcpy (buf + 28 , trapInstr.data (), 4 );
250
285
}
251
- write32 (buf + 0 , pltData[0 ]);
252
- write32 (buf + 4 , pltData[1 ] | ((offset >> 20 ) & 0xff ));
253
- write32 (buf + 8 , pltData[2 ] | ((offset >> 12 ) & 0xff ));
254
- write32 (buf + 12 , pltData[3 ] | (offset & 0xfff ));
255
- memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
256
- memcpy (buf + 20 , trapInstr.data (), 4 );
257
- memcpy (buf + 24 , trapInstr.data (), 4 );
258
- memcpy (buf + 28 , trapInstr.data (), 4 );
259
286
}
260
287
261
288
void ARM::addPltHeaderSymbols (InputSection &isec) const {
262
- addSyntheticLocal (" $a" , STT_NOTYPE, 0 , 0 , isec);
263
- addSyntheticLocal (" $d" , STT_NOTYPE, 16 , 0 , isec);
289
+ if (!config->armAlwaysThumb ) {
290
+ addSyntheticLocal (" $a" , STT_NOTYPE, 0 , 0 , isec);
291
+ addSyntheticLocal (" $d" , STT_NOTYPE, 16 , 0 , isec);
292
+ } else {
293
+ addSyntheticLocal (" $t" , STT_NOTYPE, 0 , 0 , isec);
294
+ addSyntheticLocal (" $d" , STT_NOTYPE, 12 , 0 , isec);
295
+ }
264
296
}
265
297
266
298
// Long form PLT entries that do not have any restrictions on the displacement
@@ -279,32 +311,114 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
279
311
// .plt in the positive direction.
280
312
void ARM::writePlt (uint8_t *buf, const Symbol &sym,
281
313
uint64_t pltEntryAddr) const {
282
- // The PLT entry is similar to the example given in Appendix A of ELF for
283
- // the Arm Architecture. Instead of using the Group Relocations to find the
284
- // optimal rotation for the 8-bit immediate used in the add instructions we
285
- // hard code the most compact rotations for simplicity. This saves a load
286
- // instruction over the long plt sequences.
287
- const uint32_t pltData[] = {
288
- 0xe28fc600 , // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
289
- 0xe28cca00 , // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
290
- 0xe5bcf000 , // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
291
- };
292
314
293
- uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 8 ;
294
- if (!llvm::isUInt<27 >(offset)) {
295
- // We cannot encode the Offset, use the long form.
296
- writePltLong (buf, sym.getGotPltVA (), pltEntryAddr);
297
- return ;
315
+ if (!config->armAlwaysThumb ) {
316
+ uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 8 ;
317
+ // llvm::errs() << "sym: " << sym.getName() << "\n";
318
+ // llvm::errs() << "offset: " << (void*)offset << "\n";
319
+
320
+ // The PLT entry is similar to the example given in Appendix A of ELF for
321
+ // the Arm Architecture. Instead of using the Group Relocations to find the
322
+ // optimal rotation for the 8-bit immediate used in the add instructions we
323
+ // hard code the most compact rotations for simplicity. This saves a load
324
+ // instruction over the long plt sequences.
325
+ const uint32_t pltData[] = {
326
+ 0xe28fc600 , // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
327
+ 0xe28cca00 , // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
328
+ 0xe5bcf000 , // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
329
+ };
330
+ if (!llvm::isUInt<27 >(offset)) {
331
+ // We cannot encode the Offset, use the long form.
332
+ writePltLong (buf, sym.getGotPltVA (), pltEntryAddr);
333
+ return ;
334
+ }
335
+ write32 (buf + 0 , pltData[0 ] | ((offset >> 20 ) & 0xff ));
336
+ write32 (buf + 4 , pltData[1 ] | ((offset >> 12 ) & 0xff ));
337
+ write32 (buf + 8 , pltData[2 ] | (offset & 0xfff ));
338
+ memcpy (buf + 12 , trapInstr.data (), 4 ); // Pad to 16-byte boundary
339
+ } else {
340
+ uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 12 ;
341
+ // llvm::errs() << "sym: " << sym.getName() << "\n";
342
+ // llvm::errs() << "offset: " << (void*)offset << "\n";
343
+
344
+ if (!llvm::isUInt<32 >(offset)) {
345
+ llvm::errs () << " TODO: Implement long thumb plt?\n " ;
346
+ __builtin_trap ();
347
+ }
348
+ // MOVW: https://developer.arm.com/documentation/ddi0308/d/Thumb-Instructions/Alphabetical-list-of-Thumb-instructions/MOV--immediate-
349
+ // MOVT: https://developer.arm.com/documentation/ddi0308/d/Thumb-Instructions/Alphabetical-list-of-Thumb-instructions/MOVT
350
+ // Emit
351
+ //
352
+ // movw ip, #<lower 16 bits>
353
+ // movt ip, #<upper 16 bits>
354
+ // add ip, pc
355
+ // ldr.w pc, [ip]
356
+ //
357
+ // where ip = r12 = 0xc
358
+ //
359
+ constexpr uint32_t pltData[] = {
360
+ 0x0c00f240 , // movw ip, <offset lower 16>
361
+ 0x0c00f2c0 , // movt ip, <offset higher 16>
362
+ };
363
+ // movw encoding:
364
+ //
365
+ // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
366
+ // 1 1 1 1 0 i 1 0 0 1 0 0 imm4
367
+ //
368
+ // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
369
+ // 0 imm3 Rd0 imm8
370
+ //
371
+ // imm16 = imm4:i:imm3:imm8, i = bit 11
372
+ //
373
+ uint16_t offset_lower = offset & 0xffff ;
374
+ // llvm::errs() << "offset_lower: " << format_hex(offset_lower, 4) << "\n";
375
+ uint32_t movwImm8 = offset_lower & 0xff ;
376
+ uint32_t movwImm3 = (offset_lower >> 8 ) & 0x7 ;
377
+ uint32_t movwI = (offset_lower >> 11 ) & 0x1 ;
378
+ uint32_t movwImm4 = (offset_lower >> 12 ) & 0xf ;
379
+ uint32_t movwBits = (movwI << 10 ) | (movwImm4 << 0 ) | (movwImm3 << 28 ) | (movwImm8 << 16 );
380
+ // uint32_t movwBits = (movwI << 26) | (movwImm4 << 16) | (movwImm3 << 12) | movwImm8;
381
+ // llvm::errs() << "movwBits: " << format_hex(movwBits, 4) << "\n";
382
+ write32 (buf + 0 , pltData[0 ] | movwBits);
383
+
384
+ // movt encoding:
385
+ //
386
+ // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
387
+ // 1 1 1 1 0 i 1 0 1 1 0 0 imm4
388
+ //
389
+ // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
390
+ // 0 imm3 Rd0 imm8
391
+ //
392
+ // imm16 = imm4:i:imm3:imm8, i = bit 11
393
+ //
394
+ uint16_t offset_upper = static_cast <uint16_t >(offset >> 16 );
395
+ // llvm::errs() << "offset_upper: " << format_hex(offset_upper, 4) << "\n";
396
+ uint32_t movtImm8 = offset_upper & 0xff ;
397
+ uint32_t movtImm3 = (offset_upper >> 8 ) & 0x7 ;
398
+ uint32_t movtI = (offset_upper >> 11 ) & 0x1 ;
399
+ uint32_t movtImm4 = (offset_upper >> 12 ) & 0xf ;
400
+ // uint32_t movtBits = (movtI << 26) | (movtImm4 << 16) | (movtImm3 << 12) | movtImm8;
401
+ uint32_t movtBits = (movtI << 10 ) | (movtImm4 << 0 ) | (movtImm3 << 28 ) | (movtImm8 << 16 );
402
+ // llvm::errs() << "movtBits: " << format_hex(movtBits, 4) << "\n";
403
+ write32 (buf + 4 , pltData[1 ] | movtBits);
404
+
405
+ write16 (buf + 8 , 0x44fc ); // add ip, pc
406
+ write32 (buf + 10 , 0xf000f8dc ); // ldr.w pc, [ip]
407
+ // write32(buf + 10, 0xf8dcf000); // ldr.w pc, [ip]
408
+ write16 (buf + 14 , 0xe7fc ); // Branch to the previous instruction.
409
+ // memcpy(buf + 14, trapInstr.data(), 2); // Pad to 16-byte boundary
410
+
411
+ // The PLT size for ARM is 16 bytes and the above sequence is 14 bytes so we could potentially fit one more instruction.
298
412
}
299
- write32 (buf + 0 , pltData[0 ] | ((offset >> 20 ) & 0xff ));
300
- write32 (buf + 4 , pltData[1 ] | ((offset >> 12 ) & 0xff ));
301
- write32 (buf + 8 , pltData[2 ] | (offset & 0xfff ));
302
- memcpy (buf + 12 , trapInstr.data (), 4 ); // Pad to 16-byte boundary
303
413
}
304
414
305
415
void ARM::addPltSymbols (InputSection &isec, uint64_t off) const {
306
- addSyntheticLocal (" $a" , STT_NOTYPE, off, 0 , isec);
307
- addSyntheticLocal (" $d" , STT_NOTYPE, off + 12 , 0 , isec);
416
+ if (!config->armAlwaysThumb ) {
417
+ addSyntheticLocal (" $a" , STT_NOTYPE, off, 0 , isec);
418
+ addSyntheticLocal (" $d" , STT_NOTYPE, off + 12 , 0 , isec);
419
+ } else {
420
+ addSyntheticLocal (" $t" , STT_NOTYPE, off, 0 , isec);
421
+ }
308
422
}
309
423
310
424
bool ARM::needsThunk (RelExpr expr, RelType type, const InputFile *file,
@@ -337,7 +451,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
337
451
case R_ARM_THM_JUMP24:
338
452
// Source is Thumb, all PLT entries are ARM so interworking is required.
339
453
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
340
- if (expr == R_PLT_PC || (s.isFunc () && (s.getVA () & 1 ) == 0 ))
454
+ if (( expr == R_PLT_PC && !config-> armAlwaysThumb ) || (s.isFunc () && (s.getVA () & 1 ) == 0 ))
341
455
return true ;
342
456
[[fallthrough]];
343
457
case R_ARM_THM_CALL: {
@@ -606,12 +720,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
606
720
// PLT entries are always ARM state so we know we need to interwork.
607
721
assert (rel.sym ); // R_ARM_THM_CALL is always reached via relocate().
608
722
bool bit0Thumb = val & 1 ;
723
+ bool useThumb = bit0Thumb || config->armAlwaysThumb ;
609
724
bool isBlx = (read16 (loc + 2 ) & 0x1000 ) == 0 ;
610
725
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
611
726
// even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
612
- if (!rel.sym ->isFunc () && !rel.sym ->isInPlt () && isBlx == bit0Thumb )
727
+ if (!rel.sym ->isFunc () && !rel.sym ->isInPlt () && isBlx == useThumb )
613
728
stateChangeWarning (loc, rel.type , *rel.sym );
614
- if (rel.sym ->isFunc () || rel.sym ->isInPlt () ? !bit0Thumb : isBlx) {
729
+ if (( rel.sym ->isFunc () || rel.sym ->isInPlt ()) ? !useThumb : isBlx) {
615
730
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
616
731
// the BLX instruction may only be two byte aligned. This must be done
617
732
// before overflow check.
0 commit comments