Skip to content

Commit 55929cd

Browse files
[JITLink][AArch32] Implement Armv5 ldr-pc stubs and use them for all pre-v7 targets (#79082)
This stub type loads an absolute address directly into the PC register. It's the simplest and most compatible way to implement a branch indirection across the entire address space (and probably the slowest as well). It's the ideal fallback for all targets for which we did not (yet) implement a more performant solution.
1 parent 5176df5 commit 55929cd

File tree

12 files changed

+439
-227
lines changed

12 files changed

+439
-227
lines changed

llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -134,33 +134,28 @@ const char *getEdgeKindName(Edge::Kind K);
134134
/// Stubs are often called "veneers" in the official docs and online.
135135
///
136136
enum class StubsFlavor {
137-
Unsupported = 0,
137+
Undefined = 0,
138+
pre_v7,
138139
v7,
139140
};
140141

141142
/// JITLink sub-arch configuration for Arm CPU models
142143
struct ArmConfig {
143144
bool J1J2BranchEncoding = false;
144-
StubsFlavor Stubs = StubsFlavor::Unsupported;
145+
StubsFlavor Stubs = StubsFlavor::Undefined;
145146
// In the long term, we might want a linker switch like --target1-rel
146147
bool Target1Rel = false;
147148
};
148149

149150
/// Obtain the sub-arch configuration for a given Arm CPU model.
150151
inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) {
151152
ArmConfig ArmCfg;
152-
switch (CPUArch) {
153-
case ARMBuildAttrs::v7:
154-
case ARMBuildAttrs::v8_A:
153+
if (CPUArch == ARMBuildAttrs::v7 || CPUArch >= ARMBuildAttrs::v7E_M) {
155154
ArmCfg.J1J2BranchEncoding = true;
156155
ArmCfg.Stubs = StubsFlavor::v7;
157-
break;
158-
default:
159-
DEBUG_WITH_TYPE("jitlink", {
160-
dbgs() << " Warning: ARM config not defined for CPU architecture "
161-
<< getCPUArchName(CPUArch) << " (" << CPUArch << ")\n";
162-
});
163-
break;
156+
} else {
157+
ArmCfg.J1J2BranchEncoding = false;
158+
ArmCfg.Stubs = StubsFlavor::pre_v7;
164159
}
165160
return ArmCfg;
166161
}
@@ -344,6 +339,43 @@ class GOTBuilder : public TableManager<GOTBuilder> {
344339
Section *GOTSection = nullptr;
345340
};
346341

342+
/// Stubs builder emits non-position-independent Arm stubs for pre-v7 CPUs.
343+
/// These architectures have no MovT/MovW instructions and don't support Thumb2.
344+
/// BL is the only Thumb instruction that can generate stubs and they can always
345+
/// be transformed into BLX.
346+
class StubsManager_prev7 {
347+
public:
348+
StubsManager_prev7() = default;
349+
350+
/// Name of the object file section that will contain all our stubs.
351+
static StringRef getSectionName() {
352+
return "__llvm_jitlink_aarch32_STUBS_prev7";
353+
}
354+
355+
/// Implements link-graph traversal via visitExistingEdges()
356+
bool visitEdge(LinkGraph &G, Block *B, Edge &E);
357+
358+
private:
359+
// Each stub uses a single block that can have 2 entryponts, one for Arm and
360+
// one for Thumb
361+
struct StubMapEntry {
362+
Block *B = nullptr;
363+
Symbol *ArmEntry = nullptr;
364+
Symbol *ThumbEntry = nullptr;
365+
};
366+
367+
std::pair<StubMapEntry *, bool> getStubMapSlot(StringRef Name) {
368+
auto &&[Stubs, NewStub] = StubMap.try_emplace(Name);
369+
return std::make_pair(&Stubs->second, NewStub);
370+
}
371+
372+
Symbol *getOrCreateSlotEntrypoint(LinkGraph &G, StubMapEntry &Slot,
373+
bool Thumb);
374+
375+
DenseMap<StringRef, StubMapEntry> StubMap;
376+
Section *StubsSection = nullptr;
377+
};
378+
347379
/// Stubs builder for v7 emits non-position-independent Arm and Thumb stubs.
348380
class StubsManager_v7 {
349381
public:

llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -259,21 +259,8 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
259259
// Resolve our internal configuration for the target. If at some point the
260260
// CPUArch alone becomes too unprecise, we can find more details in the
261261
// Tag_CPU_arch_profile.
262-
aarch32::ArmConfig ArmCfg;
263-
using namespace ARMBuildAttrs;
264-
auto Arch = static_cast<CPUArch>(ARM::getArchAttr(AK));
265-
switch (Arch) {
266-
case v7:
267-
case v8_A:
268-
ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
269-
assert(ArmCfg.Stubs != aarch32::StubsFlavor::Unsupported &&
270-
"Provide a config for each supported CPU");
271-
break;
272-
default:
273-
return make_error<JITLinkError>(
274-
"Failed to build ELF link graph: Unsupported CPU arch " +
275-
StringRef(aarch32::getCPUArchName(Arch)));
276-
}
262+
auto Arch = static_cast<ARMBuildAttrs::CPUArch>(ARM::getArchAttr(AK));
263+
aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
277264

278265
// Populate the link-graph.
279266
switch (TT.getArch()) {
@@ -318,11 +305,15 @@ void link_ELF_aarch32(std::unique_ptr<LinkGraph> G,
318305
PassCfg.PrePrunePasses.push_back(markAllSymbolsLive);
319306

320307
switch (ArmCfg.Stubs) {
308+
case aarch32::StubsFlavor::pre_v7:
309+
PassCfg.PostPrunePasses.push_back(
310+
buildTables_ELF_aarch32<aarch32::StubsManager_prev7>);
311+
break;
321312
case aarch32::StubsFlavor::v7:
322313
PassCfg.PostPrunePasses.push_back(
323314
buildTables_ELF_aarch32<aarch32::StubsManager_v7>);
324315
break;
325-
case aarch32::StubsFlavor::Unsupported:
316+
case aarch32::StubsFlavor::Undefined:
326317
llvm_unreachable("Check before building graph");
327318
}
328319
}

llvm/lib/ExecutionEngine/JITLink/aarch32.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,13 @@ bool GOTBuilder::visitEdge(LinkGraph &G, Block *B, Edge &E) {
739739
return true;
740740
}
741741

742+
const uint8_t ArmThumbv5LdrPc[] = {
743+
0x78, 0x47, // bx pc
744+
0xfd, 0xe7, // b #-6 ; Arm recommended sequence to follow bx pc
745+
0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc,#-4] ; L1
746+
0x00, 0x00, 0x00, 0x00, // L1: .word S
747+
};
748+
742749
const uint8_t Armv7ABS[] = {
743750
0x00, 0xc0, 0x00, 0xe3, // movw r12, #0x0000 ; lower 16-bit
744751
0x00, 0xc0, 0x40, 0xe3, // movt r12, #0x0000 ; upper 16-bit
@@ -759,6 +766,12 @@ static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
759766
return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
760767
}
761768

769+
static Block &createStubPrev7(LinkGraph &G, Section &S, Symbol &Target) {
770+
Block &B = allocStub(G, S, ArmThumbv5LdrPc);
771+
B.addEdge(Data_Pointer32, 8, Target, 0);
772+
return B;
773+
}
774+
762775
static Block &createStubThumbv7(LinkGraph &G, Section &S, Symbol &Target) {
763776
Block &B = allocStub(G, S, Thumbv7ABS);
764777
B.addEdge(Thumb_MovwAbsNC, 0, Target, 0);
@@ -816,6 +829,60 @@ static bool needsStub(const Edge &E) {
816829
return false;
817830
}
818831

832+
// The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
833+
// for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
834+
// entrypoint at offset 4. Arm branches always use that one.
835+
Symbol *StubsManager_prev7::getOrCreateSlotEntrypoint(LinkGraph &G,
836+
StubMapEntry &Slot,
837+
bool Thumb) {
838+
constexpr orc::ExecutorAddrDiff ThumbEntrypointOffset = 0;
839+
constexpr orc::ExecutorAddrDiff ArmEntrypointOffset = 4;
840+
if (Thumb && !Slot.ThumbEntry) {
841+
Slot.ThumbEntry =
842+
&G.addAnonymousSymbol(*Slot.B, ThumbEntrypointOffset, 4, true, false);
843+
Slot.ThumbEntry->setTargetFlags(ThumbSymbol);
844+
}
845+
if (!Thumb && !Slot.ArmEntry)
846+
Slot.ArmEntry =
847+
&G.addAnonymousSymbol(*Slot.B, ArmEntrypointOffset, 8, true, false);
848+
return Thumb ? Slot.ThumbEntry : Slot.ArmEntry;
849+
}
850+
851+
bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
852+
if (!needsStub(E))
853+
return false;
854+
855+
Symbol &Target = E.getTarget();
856+
assert(Target.hasName() && "Edge cannot point to anonymous target");
857+
auto [Slot, NewStub] = getStubMapSlot(Target.getName());
858+
859+
if (NewStub) {
860+
if (!StubsSection)
861+
StubsSection = &G.createSection(getSectionName(),
862+
orc::MemProt::Read | orc::MemProt::Exec);
863+
LLVM_DEBUG({
864+
dbgs() << " Created stub entry for " << Target.getName() << " in "
865+
<< StubsSection->getName() << "\n";
866+
});
867+
Slot->B = &createStubPrev7(G, *StubsSection, Target);
868+
}
869+
870+
// The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
871+
// for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
872+
// entrypoint at offset 4. Arm branches always use that one.
873+
bool UseThumb = E.getKind() == Thumb_Jump24;
874+
Symbol *StubEntrypoint = getOrCreateSlotEntrypoint(G, *Slot, UseThumb);
875+
876+
LLVM_DEBUG({
877+
dbgs() << " Using " << (UseThumb ? "Thumb" : "Arm") << " entrypoint "
878+
<< *StubEntrypoint << " in "
879+
<< StubEntrypoint->getBlock().getSection().getName() << "\n";
880+
});
881+
882+
E.setTarget(*StubEntrypoint);
883+
return true;
884+
}
885+
819886
bool StubsManager_v7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
820887
if (!needsStub(E))
821888
return false;

llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,22 @@
1-
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s
2-
# RUN: llvm-objdump -r %t.o | FileCheck --check-prefix=CHECK-TYPE %s
3-
# RUN: llvm-objdump --disassemble %t.o | FileCheck --check-prefix=CHECK-INSTR %s
1+
# Test pre-v7 Arm features
2+
#
3+
# RUN: llvm-mc -triple=armv4t-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv4t.o %s
4+
# RUN: llvm-objdump -r %t_armv4t.o | FileCheck --check-prefix=CHECK-TYPE %s
5+
# RUN: llvm-objdump --disassemble %t_armv4t.o | FileCheck --check-prefix=CHECK-INSTR %s
46
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
5-
# RUN: -slab-page-size 4096 -show-entry-es -check %s %t.o
7+
# RUN: -slab-page-size 4096 -check %s %t_armv4t.o
8+
#
9+
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
10+
# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
11+
# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
12+
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
13+
# RUN: -slab-page-size 4096 -check %s %t_armv7.o
14+
#
15+
# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
16+
# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
17+
# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
18+
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
19+
# RUN: -slab-page-size 4096 -check %s %t_armv9.o
620

721

822
.text
@@ -63,38 +77,6 @@ jump24_target:
6377
bx lr
6478
.size jump24_target, .-jump24_target
6579

66-
67-
# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
68-
# CHECK-INSTR: 0000001c <movw>:
69-
# CHECK-INSTR: 1c: e3000000 movw r0, #0x0
70-
# jitlink-check: decode_operand(movw, 1) = (data_symbol&0x0000ffff)
71-
.globl movw
72-
.type movw,%function
73-
.p2align 2
74-
movw:
75-
movw r0, :lower16:data_symbol
76-
.size movw, .-movw
77-
78-
# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
79-
# CHECK-INSTR: 00000020 <movt>:
80-
# CHECK-INSTR: 20: e3400000 movt r0, #0x0
81-
# We decode the operand with index 2, because movt generates one leading implicit
82-
# predicate operand that we have to skip in order to decode the data_symbol operand
83-
# jitlink-check: decode_operand(movt, 2) = (data_symbol&0xffff0000>>16)
84-
.globl movt
85-
.type movt,%function
86-
.p2align 2
87-
movt:
88-
movt r0, :upper16:data_symbol
89-
.size movt, .-movt
90-
91-
.data
92-
.global data_symbol
93-
data_symbol:
94-
.long 1073741822
95-
96-
.text
97-
9880
# Empty main function for jitlink to be happy
9981
.globl main
10082
.type main,%function
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Test v7 Arm features
2+
#
3+
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
4+
# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
5+
# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
6+
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
7+
# RUN: -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv7.o
8+
#
9+
# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
10+
# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
11+
# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
12+
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
13+
# RUN: -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv9.o
14+
15+
16+
.text
17+
.syntax unified
18+
19+
# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
20+
# CHECK-INSTR: <movw>:
21+
# CHECK-INSTR: e3000000 movw r0, #0x0
22+
# jitlink-check: decode_operand(movw, 1) = data_symbol[15:0]
23+
.globl movw
24+
.type movw,%function
25+
.p2align 2
26+
movw:
27+
movw r0, :lower16:data_symbol
28+
.size movw, .-movw
29+
30+
# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
31+
# CHECK-INSTR: <movt>:
32+
# CHECK-INSTR: e3400000 movt r0, #0x0
33+
# We decode the operand with index 2, because movt generates one leading implicit
34+
# predicate operand that we have to skip in order to decode the data_symbol operand
35+
# jitlink-check: decode_operand(movt, 2) = data_symbol[31:16]
36+
.globl movt
37+
.type movt,%function
38+
.p2align 2
39+
movt:
40+
movt r0, :upper16:data_symbol
41+
.size movt, .-movt
42+
43+
# Empty main function for jitlink to be happy
44+
.globl main
45+
.type main,%function
46+
.p2align 2
47+
main:
48+
bx lr
49+
.size main, .-main

llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_data.s

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
# RUN: rm -rf %t && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
1+
# RUN: rm -rf %t && mkdir -p %t/armv6 && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
2+
# RUN: llvm-mc -triple=armv6-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv6/out.o %s
3+
# RUN: llvm-objdump -r %t/armv6/out.o | FileCheck --check-prefix=CHECK-TYPE %s
4+
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \
5+
# RUN: -abs target=0x76bbe88f -check %s %t/armv6/out.o
6+
27
# RUN: llvm-mc -triple=armv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv7/out.o %s
38
# RUN: llvm-objdump -r %t/armv7/out.o | FileCheck --check-prefix=CHECK-TYPE %s
49
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \

0 commit comments

Comments
 (0)