Skip to content

[JITLink][AArch32] Implement Armv5 ldr-pc stubs and use them for all pre-v7 targets #79082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/tools/clang-repl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if(CLANG_PLUGIN_SUPPORT)
export_executable_symbols_for_plugins(clang-repl)
endif()

string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} system_processor)
string(TOUPPER "${CMAKE_SYSTEM_PROCESSOR}" system_processor)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unrelated change that is on mainline already)

if(system_processor MATCHES "ARM")
set(FLAG_LONG_PLT "-Wl,--long-plt")
llvm_check_linker_flag(CXX ${FLAG_LONG_PLT} LINKER_HAS_FLAG_LONG_PLT)
Expand Down
56 changes: 44 additions & 12 deletions llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,33 +131,28 @@ const char *getEdgeKindName(Edge::Kind K);
/// Stubs are often called "veneers" in the official docs and online.
///
enum class StubsFlavor {
Unsupported = 0,
Undefined = 0,
pre_v7,
v7,
};

/// JITLink sub-arch configuration for Arm CPU models
struct ArmConfig {
bool J1J2BranchEncoding = false;
StubsFlavor Stubs = StubsFlavor::Unsupported;
StubsFlavor Stubs = StubsFlavor::Undefined;
// In the long term, we might want a linker switch like --target1-rel
bool Target1Rel = false;
};

/// Obtain the sub-arch configuration for a given Arm CPU model.
inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) {
ArmConfig ArmCfg;
switch (CPUArch) {
case ARMBuildAttrs::v7:
case ARMBuildAttrs::v8_A:
if (CPUArch == ARMBuildAttrs::v7 || CPUArch >= ARMBuildAttrs::v7E_M) {
ArmCfg.J1J2BranchEncoding = true;
ArmCfg.Stubs = StubsFlavor::v7;
break;
default:
DEBUG_WITH_TYPE("jitlink", {
dbgs() << " Warning: ARM config not defined for CPU architecture "
<< getCPUArchName(CPUArch) << " (" << CPUArch << ")\n";
});
break;
} else {
ArmCfg.J1J2BranchEncoding = false;
ArmCfg.Stubs = StubsFlavor::pre_v7;
}
return ArmCfg;
}
Expand Down Expand Up @@ -341,6 +336,43 @@ class GOTBuilder : public TableManager<GOTBuilder> {
Section *GOTSection = nullptr;
};

/// Stubs builder emits non-position-independent Arm stubs for pre-v7 CPUs.
/// These architectures have no MovT/MovW instructions and don't support Thumb2.
/// BL is the only Thumb instruction that can generate stubs and they can always
/// be transformed into BLX.
class StubsManager_prev7 {
public:
StubsManager_prev7() = default;

/// Name of the object file section that will contain all our stubs.
static StringRef getSectionName() {
return "__llvm_jitlink_aarch32_STUBS_prev7";
}

/// Implements link-graph traversal via visitExistingEdges()
bool visitEdge(LinkGraph &G, Block *B, Edge &E);

private:
// Each stub uses a single block that can have 2 entryponts, one for Arm and
// one for Thumb
struct StubMapEntry {
Block *B = nullptr;
Symbol *ArmEntry = nullptr;
Symbol *ThumbEntry = nullptr;
};

std::pair<StubMapEntry *, bool> getStubMapSlot(StringRef Name) {
auto &&[Stubs, NewStub] = StubMap.try_emplace(Name);
return std::make_pair(&Stubs->second, NewStub);
}

Symbol *getOrCreateSlotEntrypoint(LinkGraph &G, StubMapEntry &Slot,
bool Thumb);

DenseMap<StringRef, StubMapEntry> StubMap;
Section *StubsSection = nullptr;
};

/// Stubs builder for v7 emits non-position-independent Arm and Thumb stubs.
class StubsManager_v7 {
public:
Expand Down
23 changes: 7 additions & 16 deletions llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,21 +265,8 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
// Resolve our internal configuration for the target. If at some point the
// CPUArch alone becomes too unprecise, we can find more details in the
// Tag_CPU_arch_profile.
aarch32::ArmConfig ArmCfg;
using namespace ARMBuildAttrs;
auto Arch = static_cast<CPUArch>(ARM::getArchAttr(AK));
switch (Arch) {
case v7:
case v8_A:
ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
assert(ArmCfg.Stubs != aarch32::StubsFlavor::Unsupported &&
"Provide a config for each supported CPU");
break;
default:
return make_error<JITLinkError>(
"Failed to build ELF link graph: Unsupported CPU arch " +
StringRef(aarch32::getCPUArchName(Arch)));
}
auto Arch = static_cast<ARMBuildAttrs::CPUArch>(ARM::getArchAttr(AK));
aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(Arch);

// Populate the link-graph.
switch (TT.getArch()) {
Expand Down Expand Up @@ -324,11 +311,15 @@ void link_ELF_aarch32(std::unique_ptr<LinkGraph> G,
PassCfg.PrePrunePasses.push_back(markAllSymbolsLive);

switch (ArmCfg.Stubs) {
case aarch32::StubsFlavor::pre_v7:
PassCfg.PostPrunePasses.push_back(
buildTables_ELF_aarch32<aarch32::StubsManager_prev7>);
break;
case aarch32::StubsFlavor::v7:
PassCfg.PostPrunePasses.push_back(
buildTables_ELF_aarch32<aarch32::StubsManager_v7>);
break;
case aarch32::StubsFlavor::Unsupported:
case aarch32::StubsFlavor::Undefined:
llvm_unreachable("Check before building graph");
}
}
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,13 @@ bool GOTBuilder::visitEdge(LinkGraph &G, Block *B, Edge &E) {
return true;
}

const uint8_t ArmThumbv5LdrPc[] = {
0x78, 0x47, // bx pc
0xfd, 0xe7, // b #-6 ; Arm recommended sequence to follow bx pc
0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc,#-4] ; L1
0x00, 0x00, 0x00, 0x00, // L1: .word S
};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extended the v5LdrPc stub with the Thumb prologue. Thumb B instructions will branch to offset 0 of this block. Thumb BL are be rewritten to BLX and branch to offset 4. Same for Arm branches.


const uint8_t Armv7ABS[] = {
0x00, 0xc0, 0x00, 0xe3, // movw r12, #0x0000 ; lower 16-bit
0x00, 0xc0, 0x40, 0xe3, // movt r12, #0x0000 ; upper 16-bit
Expand All @@ -745,6 +752,12 @@ static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
}

static Block &createStubPrev7(LinkGraph &G, Section &S, Symbol &Target) {
Block &B = allocStub(G, S, ArmThumbv5LdrPc);
B.addEdge(Data_Pointer32, 8, Target, 0);
return B;
}

static Block &createStubThumbv7(LinkGraph &G, Section &S, Symbol &Target) {
Block &B = allocStub(G, S, Thumbv7ABS);
B.addEdge(Thumb_MovwAbsNC, 0, Target, 0);
Expand Down Expand Up @@ -802,6 +815,60 @@ static bool needsStub(const Edge &E) {
return false;
}

// The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
// for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
// entrypoint at offset 4. Arm branches always use that one.
Symbol *StubsManager_prev7::getOrCreateSlotEntrypoint(LinkGraph &G,
StubMapEntry &Slot,
bool Thumb) {
constexpr orc::ExecutorAddrDiff ThumbEntrypointOffset = 0;
constexpr orc::ExecutorAddrDiff ArmEntrypointOffset = 4;
if (Thumb && !Slot.ThumbEntry) {
Slot.ThumbEntry =
&G.addAnonymousSymbol(*Slot.B, ThumbEntrypointOffset, 4, true, false);
Slot.ThumbEntry->setTargetFlags(ThumbSymbol);
}
if (!Thumb && !Slot.ArmEntry)
Slot.ArmEntry =
&G.addAnonymousSymbol(*Slot.B, ArmEntrypointOffset, 8, true, false);
return Thumb ? Slot.ThumbEntry : Slot.ArmEntry;
}

bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
if (!needsStub(E))
return false;

Symbol &Target = E.getTarget();
assert(Target.hasName() && "Edge cannot point to anonymous target");
auto [Slot, NewStub] = getStubMapSlot(Target.getName());

if (NewStub) {
if (!StubsSection)
StubsSection = &G.createSection(getSectionName(),
orc::MemProt::Read | orc::MemProt::Exec);
LLVM_DEBUG({
dbgs() << " Created stub entry for " << Target.getName() << " in "
<< StubsSection->getName() << "\n";
});
Slot->B = &createStubPrev7(G, *StubsSection, Target);
}

// The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
// for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
// entrypoint at offset 4. Arm branches always use that one.
bool UseThumb = E.getKind() == Thumb_Jump24;
Symbol *StubEntrypoint = getOrCreateSlotEntrypoint(G, *Slot, UseThumb);

LLVM_DEBUG({
dbgs() << " Using " << (UseThumb ? "Thumb" : "Arm") << " entrypoint "
<< *StubEntrypoint << " in "
<< StubEntrypoint->getBlock().getSection().getName() << "\n";
});

E.setTarget(*StubEntrypoint);
return true;
}

bool StubsManager_v7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
if (!needsStub(E))
return false;
Expand Down
54 changes: 18 additions & 36 deletions llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s
# RUN: llvm-objdump -r %t.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t.o | FileCheck --check-prefix=CHECK-INSTR %s
# Test pre-v7 Arm features
#
# RUN: llvm-mc -triple=armv4t-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv4t.o %s
# RUN: llvm-objdump -r %t_armv4t.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t_armv4t.o | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
# RUN: -slab-page-size 4096 -show-entry-es -check %s %t.o
# RUN: -slab-page-size 4096 -check %s %t_armv4t.o
#
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
# RUN: -slab-page-size 4096 -check %s %t_armv7.o
#
# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
# RUN: -slab-page-size 4096 -check %s %t_armv9.o


.text
Expand Down Expand Up @@ -63,38 +77,6 @@ jump24_target:
bx lr
.size jump24_target, .-jump24_target


# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
# CHECK-INSTR: 0000001c <movw>:
# CHECK-INSTR: 1c: e3000000 movw r0, #0x0
# jitlink-check: decode_operand(movw, 1) = (data_symbol&0x0000ffff)
.globl movw
.type movw,%function
.p2align 2
movw:
movw r0, :lower16:data_symbol
.size movw, .-movw

# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
# CHECK-INSTR: 00000020 <movt>:
# CHECK-INSTR: 20: e3400000 movt r0, #0x0
# We decode the operand with index 2, because movt generates one leading implicit
# predicate operand that we have to skip in order to decode the data_symbol operand
# jitlink-check: decode_operand(movt, 2) = (data_symbol&0xffff0000>>16)
.globl movt
.type movt,%function
.p2align 2
movt:
movt r0, :upper16:data_symbol
.size movt, .-movt

.data
.global data_symbol
data_symbol:
.long 1073741822

.text

# Empty main function for jitlink to be happy
.globl main
.type main,%function
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Test v7 Arm features
#
# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
# RUN: -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv7.o
#
# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
# RUN: -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv9.o


.text
.syntax unified

# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
# CHECK-INSTR: <movw>:
# CHECK-INSTR: e3000000 movw r0, #0x0
# jitlink-check: decode_operand(movw, 1) = data_symbol[15:0]
.globl movw
.type movw,%function
.p2align 2
movw:
movw r0, :lower16:data_symbol
.size movw, .-movw

# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
# CHECK-INSTR: <movt>:
# CHECK-INSTR: e3400000 movt r0, #0x0
# We decode the operand with index 2, because movt generates one leading implicit
# predicate operand that we have to skip in order to decode the data_symbol operand
# jitlink-check: decode_operand(movt, 2) = data_symbol[31:16]
.globl movt
.type movt,%function
.p2align 2
movt:
movt r0, :upper16:data_symbol
.size movt, .-movt

# Empty main function for jitlink to be happy
.globl main
.type main,%function
.p2align 2
main:
bx lr
.size main, .-main
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# RUN: rm -rf %t && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
# RUN: rm -rf %t && mkdir -p %t/armv6 && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
# RUN: llvm-mc -triple=armv6-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv6/out.o %s
# RUN: llvm-objdump -r %t/armv6/out.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \
# RUN: -abs target=0x76bbe88f -check %s %t/armv6/out.o

# RUN: llvm-mc -triple=armv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv7/out.o %s
# RUN: llvm-objdump -r %t/armv7/out.o | FileCheck --check-prefix=CHECK-TYPE %s
# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \
Expand Down
Loading