Skip to content

Commit bfc7636

Browse files
[LLD][PowerPC] Add a pc-rel based long branch thunk
In this patch, a pc-rel based long branch thunk is added for the local call protocol that caller and callee does not use TOC. Reviewed By: sfertile, nemanjai Differential Revision: https://reviews.llvm.org/D86706
1 parent b9b954b commit bfc7636

File tree

3 files changed

+136
-5
lines changed

3 files changed

+136
-5
lines changed

lld/ELF/Thunks.cpp

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ class PPC64PCRelPLTStub final : public Thunk {
326326

327327
// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
328328
// alignment. This gives a possible 26 bits of 'reach'. If the call offset is
329-
// larger then that we need to emit a long-branch thunk. The target address
329+
// larger than that we need to emit a long-branch thunk. The target address
330330
// of the callee is stored in a table to be accessed TOC-relative. Since the
331331
// call must be local (a non-local call will have a PltCallStub instead) the
332332
// table stores the address of the callee's local entry point. For
@@ -337,6 +337,8 @@ class PPC64LongBranchThunk : public Thunk {
337337
uint32_t size() override { return 16; }
338338
void writeTo(uint8_t *buf) override;
339339
void addSymbols(ThunkSection &isec) override;
340+
bool isCompatibleWith(const InputSection &isec,
341+
const Relocation &rel) const override;
340342

341343
protected:
342344
PPC64LongBranchThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
@@ -365,6 +367,24 @@ class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk {
365367
}
366368
};
367369

370+
// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
371+
// alignment. This gives a possible 26 bits of 'reach'. If the caller and
372+
// callee do not use toc and the call offset is larger than 26 bits,
373+
// we need to emit a pc-rel based long-branch thunk. The target address of
374+
// the callee is computed with a PC-relative offset.
375+
class PPC64PCRelLongBranchThunk final : public Thunk {
376+
public:
377+
PPC64PCRelLongBranchThunk(Symbol &dest, int64_t addend)
378+
: Thunk(dest, addend) {
379+
alignment = 16;
380+
}
381+
uint32_t size() override { return 16; }
382+
void writeTo(uint8_t *buf) override;
383+
void addSymbols(ThunkSection &isec) override;
384+
bool isCompatibleWith(const InputSection &isec,
385+
const Relocation &rel) const override;
386+
};
387+
368388
} // end anonymous namespace
369389

370390
Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value,
@@ -937,6 +957,33 @@ void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) {
937957
isec);
938958
}
939959

960+
bool PPC64LongBranchThunk::isCompatibleWith(const InputSection &isec,
961+
const Relocation &rel) const {
962+
return rel.type == R_PPC64_REL24 || rel.type == R_PPC64_REL14;
963+
}
964+
965+
void PPC64PCRelLongBranchThunk::writeTo(uint8_t *buf) {
966+
int64_t offset = destination.getVA() - getThunkTargetSym()->getVA();
967+
if (!isInt<34>(offset))
968+
fatal("offset overflow 34 bits, please compile using the large code model");
969+
uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) |
970+
(offset & 0xffff);
971+
972+
writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1
973+
write32(buf + 8, MTCTR_R12); // mtctr r12
974+
write32(buf + 12, BCTR); // bctr
975+
}
976+
977+
void PPC64PCRelLongBranchThunk::addSymbols(ThunkSection &isec) {
978+
addSymbol(saver.save("__long_branch_pcrel_" + destination.getName()),
979+
STT_FUNC, 0, isec);
980+
}
981+
982+
bool PPC64PCRelLongBranchThunk::isCompatibleWith(const InputSection &isec,
983+
const Relocation &rel) const {
984+
return rel.type == R_PPC64_REL24_NOTOC;
985+
}
986+
940987
Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {}
941988

942989
Thunk::~Thunk() = default;
@@ -1057,12 +1104,15 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
10571104
: (Thunk *)make<PPC64PltCallStub>(s);
10581105

10591106
// This check looks at the st_other bits of the callee. If the value is 1
1060-
// then the callee clobbers the TOC and we need an R2 save stub.
1061-
if ((s.stOther >> 5) == 1)
1107+
// then the callee clobbers the TOC and we need an R2 save stub when RelType
1108+
// is R_PPC64_REL14 or R_PPC64_REL24.
1109+
if ((type == R_PPC64_REL14 || type == R_PPC64_REL24) && (s.stOther >> 5) == 1)
10621110
return make<PPC64R2SaveStub>(s);
10631111

1064-
if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
1065-
return make<PPC64R12SetupStub>(s);
1112+
if (type == R_PPC64_REL24_NOTOC)
1113+
return (s.stOther >> 5) > 1
1114+
? (Thunk *)make<PPC64R12SetupStub>(s)
1115+
: (Thunk *)make<PPC64PCRelLongBranchThunk>(s, a);
10661116

10671117
if (config->picThunk)
10681118
return make<PPC64PILongBranchThunk>(s, a);
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# REQUIRES: ppc
2+
# RUN: echo 'SECTIONS { \
3+
# RUN: .text_low 0x2000: { *(.text_low) } \
4+
# RUN: .text_high 0x800002000 : { *(.text_high) } \
5+
# RUN: }' > %t.script
6+
7+
# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o
8+
# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s
9+
# RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o
10+
# RUN: not ld.lld -shared -T %t.script %t.o -o %t.so 2>&1 >/dev/null | FileCheck %s
11+
12+
# RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o
13+
# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 >/dev/null | FileCheck %s
14+
# RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o
15+
# RUN: not ld.lld -shared -T %t.script %t.o -o %t.so 2>&1 >/dev/null | FileCheck %s
16+
17+
# CHECK: error: offset overflow 34 bits, please compile using the large code model
18+
19+
.section .text_low, "ax", %progbits
20+
.globl _start
21+
_start:
22+
bl high@notoc
23+
blr
24+
25+
.section .text_high, "ax", %progbits
26+
.ifdef HIDDEN
27+
.hidden high
28+
.endif
29+
.globl high
30+
high:
31+
blr
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# REQUIRES: ppc
2+
# RUN: echo 'SECTIONS { \
3+
# RUN: .text_low 0x2000: { *(.text_low) } \
4+
# RUN: .text_high 0x2002000 : { *(.text_high) } \
5+
# RUN: }' > %t.script
6+
7+
# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o
8+
# RUN: ld.lld -T %t.script %t.o -o %t
9+
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
10+
11+
# RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o
12+
# RUN: ld.lld -shared -T %t.script %t.o -o %t.so
13+
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t.so | FileCheck %s
14+
15+
# RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o
16+
# RUN: ld.lld -T %t.script %t.o -o %t
17+
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s
18+
19+
# RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o
20+
# RUN: ld.lld -shared -T %t.script %t.o -o %t.so
21+
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t.so | FileCheck %s
22+
23+
# CHECK-LABEL: <_start>:
24+
# CHECK-NEXT: 2000: bl 0x2010
25+
# CHECK-NEXT: blr
26+
# CHECK-NEXT: trap
27+
# CHECK-NEXT: trap
28+
29+
## Callee address - program counter = 0x2002000 - 0x2010 = 33554416
30+
# CHECK-LABEL: <__long_branch_pcrel_high>:
31+
# CHECK-NEXT: 2010: paddi 12, 0, 33554416, 1
32+
# CHECK-NEXT: mtctr 12
33+
# CHECK-NEXT: bctr
34+
35+
# CHECK-LABEL: <high>:
36+
# CHECK-NEXT: 2002000: blr
37+
38+
.section .text_low, "ax", %progbits
39+
.globl _start
40+
_start:
41+
bl high@notoc
42+
blr
43+
44+
.section .text_high, "ax", %progbits
45+
.ifdef HIDDEN
46+
.hidden high
47+
.endif
48+
.globl high
49+
high:
50+
blr

0 commit comments

Comments
 (0)