Skip to content

Commit f146763

Browse files
committed
Revert "Revert "[lld][Arm] Big Endian - Byte invariant support.""
This reverts commit d885138. Reason: Applied the fix for the Asan buildbot failures.
1 parent 81ec494 commit f146763

32 files changed

+435
-68
lines changed

lld/ELF/Arch/ARM.cpp

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "InputFiles.h"
910
#include "Symbols.h"
1011
#include "SyntheticSections.h"
1112
#include "Target.h"
@@ -44,8 +45,11 @@ class ARM final : public TargetInfo {
4445
void relocate(uint8_t *loc, const Relocation &rel,
4546
uint64_t val) const override;
4647
};
48+
enum class CodeState { Data = 0, Thumb = 2, Arm = 4 };
4749
} // namespace
4850

51+
static DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap{};
52+
4953
ARM::ARM() {
5054
copyRel = R_ARM_COPY;
5155
relativeRel = R_ARM_RELATIVE;
@@ -68,16 +72,24 @@ uint32_t ARM::calcEFlags() const {
6872
// The ABIFloatType is used by loaders to detect the floating point calling
6973
// convention.
7074
uint32_t abiFloatType = 0;
75+
76+
// Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian
77+
// with BE-8 code.
78+
uint32_t armBE8 = 0;
79+
7180
if (config->armVFPArgs == ARMVFPArgKind::Base ||
7281
config->armVFPArgs == ARMVFPArgKind::Default)
7382
abiFloatType = EF_ARM_ABI_FLOAT_SOFT;
7483
else if (config->armVFPArgs == ARMVFPArgKind::VFP)
7584
abiFloatType = EF_ARM_ABI_FLOAT_HARD;
7685

86+
if (!config->isLE && config->armBe8)
87+
armBE8 = EF_ARM_BE8;
88+
7789
// We don't currently use any features incompatible with EF_ARM_EABI_VER5,
7890
// but we don't have any firm guarantees of conformance. Linux AArch64
7991
// kernels (as of 2016) require an EABI version to be set.
80-
return EF_ARM_EABI_VER5 | abiFloatType;
92+
return EF_ARM_EABI_VER5 | abiFloatType | armBE8;
8193
}
8294

8395
RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
@@ -910,6 +922,117 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
910922
}
911923
}
912924

925+
static bool isArmMapSymbol(const Symbol *b) {
926+
return b->getName() == "$a" || b->getName().startswith("$a.");
927+
}
928+
929+
static bool isThumbMapSymbol(const Symbol *s) {
930+
return s->getName() == "$t" || s->getName().startswith("$t.");
931+
}
932+
933+
static bool isDataMapSymbol(const Symbol *b) {
934+
return b->getName() == "$d" || b->getName().startswith("$d.");
935+
}
936+
937+
void elf::sortArmMappingSymbols() {
938+
// For each input section make sure the mapping symbols are sorted in
939+
// ascending order.
940+
for (auto &kv : sectionMap) {
941+
SmallVector<const Defined *, 0> &mapSyms = kv.second;
942+
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
943+
return a->value < b->value;
944+
});
945+
}
946+
}
947+
948+
void elf::addArmInputSectionMappingSymbols() {
949+
// Collect mapping symbols for every executable input sections.
950+
// The linker generated mapping symbols for all the synthetic
951+
// sections are adding into the sectionmap through the function
952+
// addArmSyntheitcSectionMappingSymbol.
953+
for (ELFFileBase *file : ctx.objectFiles) {
954+
for (Symbol *sym : file->getLocalSymbols()) {
955+
auto *def = dyn_cast<Defined>(sym);
956+
if (!def)
957+
continue;
958+
if (!isArmMapSymbol(def) && !isDataMapSymbol(def) &&
959+
!isThumbMapSymbol(def))
960+
continue;
961+
if (auto *sec = cast_if_present<InputSection>(def->section))
962+
if (sec->flags & SHF_EXECINSTR)
963+
sectionMap[sec].push_back(def);
964+
}
965+
}
966+
}
967+
968+
// Synthetic sections are not backed by an ELF file where we can access the
969+
// symbol table, instead mapping symbols added to synthetic sections are stored
970+
// in the synthetic symbol table. Due to the presence of strip (--strip-all),
971+
// we can not rely on the synthetic symbol table retaining the mapping symbols.
972+
// Instead we record the mapping symbols locally.
973+
void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) {
974+
if (!isArmMapSymbol(sym) && !isDataMapSymbol(sym) && !isThumbMapSymbol(sym))
975+
return;
976+
if (auto *sec = cast_if_present<InputSection>(sym->section))
977+
if (sec->flags & SHF_EXECINSTR)
978+
sectionMap[sec].push_back(sym);
979+
}
980+
981+
static void toLittleEndianInstructions(uint8_t *buf, uint64_t start,
982+
uint64_t end, uint64_t width) {
983+
CodeState curState = static_cast<CodeState>(width);
984+
if (curState == CodeState::Arm)
985+
for (uint64_t i = start; i < end; i += width)
986+
write32le(buf + i, read32(buf + i));
987+
988+
if (curState == CodeState::Thumb)
989+
for (uint64_t i = start; i < end; i += width)
990+
write16le(buf + i, read16(buf + i));
991+
}
992+
993+
// Arm BE8 big endian format requires instructions to be little endian, with
994+
// the initial contents big-endian. Convert the big-endian instructions to
995+
// little endian leaving literal data untouched. We use mapping symbols to
996+
// identify half open intervals of Arm code [$a, non $a) and Thumb code
997+
// [$t, non $t) and convert these to little endian a word or half word at a
998+
// time respectively.
999+
void elf::convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf) {
1000+
if (!sectionMap.contains(sec))
1001+
return;
1002+
1003+
SmallVector<const Defined *, 0> &mapSyms = sectionMap[sec];
1004+
1005+
if (mapSyms.empty())
1006+
return;
1007+
1008+
CodeState curState = CodeState::Data;
1009+
uint64_t start = 0, width = 0, size = sec->getSize();
1010+
for (auto &msym : mapSyms) {
1011+
CodeState newState = CodeState::Data;
1012+
if (isThumbMapSymbol(msym))
1013+
newState = CodeState::Thumb;
1014+
else if (isArmMapSymbol(msym))
1015+
newState = CodeState::Arm;
1016+
1017+
if (newState == curState)
1018+
continue;
1019+
1020+
if (curState != CodeState::Data) {
1021+
width = static_cast<uint64_t>(curState);
1022+
toLittleEndianInstructions(buf, start, msym->value, width);
1023+
}
1024+
start = msym->value;
1025+
curState = newState;
1026+
}
1027+
1028+
// Passed last mapping symbol, may need to reverse
1029+
// up to end of section.
1030+
if (curState != CodeState::Data) {
1031+
width = static_cast<uint64_t>(curState);
1032+
toLittleEndianInstructions(buf, start, size, width);
1033+
}
1034+
}
1035+
9131036
TargetInfo *elf::getARMTargetInfo() {
9141037
static ARM target;
9151038
return &target;

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ struct Config {
201201
bool armHasMovtMovw = false;
202202
bool armJ1J2BranchEncoding = false;
203203
bool asNeeded = false;
204+
bool armBe8 = false;
204205
BsymbolicKind bsymbolic = BsymbolicKind::None;
205206
bool callGraphProfileSort;
206207
bool checkSections;

lld/ELF/Driver.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,12 @@ static void checkOptions() {
351351
if (config->fixCortexA8 && config->emachine != EM_ARM)
352352
error("--fix-cortex-a8 is only supported on ARM targets");
353353

354+
if (config->armBe8 && config->emachine != EM_ARM)
355+
error("--be8 is only supported on ARM targets");
356+
354357
if (config->fixCortexA8 && !config->isLE)
355358
error("--fix-cortex-a8 is not supported on big endian targets");
356-
359+
357360
if (config->tocOptimize && config->emachine != EM_PPC64)
358361
error("--toc-optimize is only supported on PowerPC64 targets");
359362

@@ -1115,6 +1118,7 @@ static void readConfigs(opt::InputArgList &args) {
11151118
OPT_no_android_memtag_stack, false);
11161119
config->androidMemtagMode = getMemtagMode(args);
11171120
config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
1121+
config->armBe8 = args.hasArg(OPT_be8);
11181122
if (opt::Arg *arg =
11191123
args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
11201124
OPT_Bsymbolic_functions, OPT_Bsymbolic)) {

lld/ELF/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ multiclass B<string name, string help1, string help2> {
3636

3737
defm auxiliary: Eq<"auxiliary", "Set DT_AUXILIARY field to the specified name">;
3838

39+
def be8: F<"be8">, HelpText<"write a Big Endian ELF file using BE8 format (AArch32 only)">;
40+
3941
def Bno_symbolic: F<"Bno-symbolic">, HelpText<"Don't bind default visibility defined symbols locally for -shared (default)">;
4042

4143
def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind default visibility defined symbols locally for -shared">;

lld/ELF/OutputSections.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,12 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
496496
else
497497
isec->writeTo<ELFT>(buf + isec->outSecOff);
498498

499+
// When in Arm BE8 mode, the linker has to convert the big-endian
500+
// instructions to little-endian, leaving the data big-endian.
501+
if (config->emachine == EM_ARM && !config->isLE && config->armBe8 &&
502+
(flags & SHF_EXECINSTR))
503+
convertArmInstructionstoBE8(isec, buf + isec->outSecOff);
504+
499505
// Fill gaps between sections.
500506
if (nonZeroFiller) {
501507
uint8_t *start = buf + isec->outSecOff + isec->getSize();

lld/ELF/SyntheticSections.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,13 @@ Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value,
271271
value, size, &section);
272272
if (in.symTab)
273273
in.symTab->addSymbol(s);
274+
275+
if (config->emachine == EM_ARM && !config->isLE && config->armBe8 &&
276+
(section.flags & SHF_EXECINSTR))
277+
// Adding Linker generated mapping symbols to the arm specific mapping
278+
// symbols list.
279+
addArmSyntheticSectionMappingSymbol(s);
280+
274281
return s;
275282
}
276283

lld/ELF/Target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,10 @@ uint64_t getPPC64TocBase();
223223
uint64_t getAArch64Page(uint64_t expr);
224224
void riscvFinalizeRelax(int passes);
225225
void mergeRISCVAttributesSections();
226+
void addArmInputSectionMappingSymbols();
227+
void addArmSyntheticSectionMappingSymbol(Defined *);
228+
void sortArmMappingSymbols();
229+
void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf);
226230

227231
LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target;
228232
TargetInfo *getTarget();

lld/ELF/Thunks.cpp

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,8 @@ void ThumbV6MABSLongThunk::addSymbols(ThunkSection &isec) {
744744
addSymbol(saver().save("__Thumbv6MABSLongThunk_" + destination.getName()),
745745
STT_FUNC, 1, isec);
746746
addSymbol("$t", STT_NOTYPE, 0, isec);
747-
addSymbol("$d", STT_NOTYPE, 8, isec);
747+
if (!getMayUseShortThunk())
748+
addSymbol("$d", STT_NOTYPE, 8, isec);
748749
}
749750

750751
void ThumbV6MPILongThunk::writeLong(uint8_t *buf) {
@@ -767,7 +768,8 @@ void ThumbV6MPILongThunk::addSymbols(ThunkSection &isec) {
767768
addSymbol(saver().save("__Thumbv6MPILongThunk_" + destination.getName()),
768769
STT_FUNC, 1, isec);
769770
addSymbol("$t", STT_NOTYPE, 0, isec);
770-
addSymbol("$d", STT_NOTYPE, 12, isec);
771+
if (!getMayUseShortThunk())
772+
addSymbol("$d", STT_NOTYPE, 12, isec);
771773
}
772774

773775
void ARMV5LongLdrPcThunk::writeLong(uint8_t *buf) {
@@ -780,7 +782,8 @@ void ARMV5LongLdrPcThunk::addSymbols(ThunkSection &isec) {
780782
addSymbol(saver().save("__ARMv5LongLdrPcThunk_" + destination.getName()),
781783
STT_FUNC, 0, isec);
782784
addSymbol("$a", STT_NOTYPE, 0, isec);
783-
addSymbol("$d", STT_NOTYPE, 4, isec);
785+
if (!getMayUseShortThunk())
786+
addSymbol("$d", STT_NOTYPE, 4, isec);
784787
}
785788

786789
void ARMV4ABSLongBXThunk::writeLong(uint8_t *buf) {
@@ -794,7 +797,8 @@ void ARMV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
794797
addSymbol(saver().save("__ARMv4ABSLongBXThunk_" + destination.getName()),
795798
STT_FUNC, 0, isec);
796799
addSymbol("$a", STT_NOTYPE, 0, isec);
797-
addSymbol("$d", STT_NOTYPE, 8, isec);
800+
if (!getMayUseShortThunk())
801+
addSymbol("$d", STT_NOTYPE, 8, isec);
798802
}
799803

800804
void ThumbV4ABSLongBXThunk::writeLong(uint8_t *buf) {
@@ -810,7 +814,8 @@ void ThumbV4ABSLongBXThunk::addSymbols(ThunkSection &isec) {
810814
STT_FUNC, 1, isec);
811815
addSymbol("$t", STT_NOTYPE, 0, isec);
812816
addSymbol("$a", STT_NOTYPE, 4, isec);
813-
addSymbol("$d", STT_NOTYPE, 8, isec);
817+
if (!getMayUseShortThunk())
818+
addSymbol("$d", STT_NOTYPE, 8, isec);
814819
}
815820

816821
void ThumbV4ABSLongThunk::writeLong(uint8_t *buf) {
@@ -827,7 +832,8 @@ void ThumbV4ABSLongThunk::addSymbols(ThunkSection &isec) {
827832
STT_FUNC, 1, isec);
828833
addSymbol("$t", STT_NOTYPE, 0, isec);
829834
addSymbol("$a", STT_NOTYPE, 4, isec);
830-
addSymbol("$d", STT_NOTYPE, 12, isec);
835+
if (!getMayUseShortThunk())
836+
addSymbol("$d", STT_NOTYPE, 12, isec);
831837
}
832838

833839
void ARMV4PILongBXThunk::writeLong(uint8_t *buf) {
@@ -844,7 +850,8 @@ void ARMV4PILongBXThunk::addSymbols(ThunkSection &isec) {
844850
addSymbol(saver().save("__ARMv4PILongBXThunk_" + destination.getName()),
845851
STT_FUNC, 0, isec);
846852
addSymbol("$a", STT_NOTYPE, 0, isec);
847-
addSymbol("$d", STT_NOTYPE, 12, isec);
853+
if (!getMayUseShortThunk())
854+
addSymbol("$d", STT_NOTYPE, 12, isec);
848855
}
849856

850857
void ARMV4PILongThunk::writeLong(uint8_t *buf) {
@@ -860,7 +867,8 @@ void ARMV4PILongThunk::addSymbols(ThunkSection &isec) {
860867
addSymbol(saver().save("__ARMv4PILongThunk_" + destination.getName()),
861868
STT_FUNC, 0, isec);
862869
addSymbol("$a", STT_NOTYPE, 0, isec);
863-
addSymbol("$d", STT_NOTYPE, 8, isec);
870+
if (!getMayUseShortThunk())
871+
addSymbol("$d", STT_NOTYPE, 8, isec);
864872
}
865873

866874
void ThumbV4PILongBXThunk::writeLong(uint8_t *buf) {
@@ -879,7 +887,8 @@ void ThumbV4PILongBXThunk::addSymbols(ThunkSection &isec) {
879887
STT_FUNC, 1, isec);
880888
addSymbol("$t", STT_NOTYPE, 0, isec);
881889
addSymbol("$a", STT_NOTYPE, 4, isec);
882-
addSymbol("$d", STT_NOTYPE, 12, isec);
890+
if (!getMayUseShortThunk())
891+
addSymbol("$d", STT_NOTYPE, 12, isec);
883892
}
884893

885894
void ThumbV4PILongThunk::writeLong(uint8_t *buf) {
@@ -899,7 +908,8 @@ void ThumbV4PILongThunk::addSymbols(ThunkSection &isec) {
899908
STT_FUNC, 1, isec);
900909
addSymbol("$t", STT_NOTYPE, 0, isec);
901910
addSymbol("$a", STT_NOTYPE, 4, isec);
902-
addSymbol("$d", STT_NOTYPE, 16, isec);
911+
if (!getMayUseShortThunk())
912+
addSymbol("$d", STT_NOTYPE, 16, isec);
903913
}
904914

905915
// Use the long jump which covers a range up to 8MiB.

lld/ELF/Writer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,6 +2149,11 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
21492149
sec->finalize();
21502150

21512151
script->checkMemoryRegions();
2152+
2153+
if (config->emachine == EM_ARM && !config->isLE && config->armBe8) {
2154+
addArmInputSectionMappingSymbols();
2155+
sortArmMappingSymbols();
2156+
}
21522157
}
21532158

21542159
// Ensure data sections are not mixed with executable sections when

lld/docs/ld.lld.1

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ Bind default visibility defined function symbols locally for
8888
.It Fl Bsymbolic-non-weak-functions
8989
Bind default visibility defined STB_GLOBAL function symbols locally for
9090
.Fl shared.
91+
.It Fl --be8
92+
Write a Big Endian ELF File using BE8 format(AArch32 only)
9193
.It Fl -build-id Ns = Ns Ar value
9294
Generate a build ID note.
9395
.Ar value

lld/test/ELF/arm-bl-v6.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x22100c --stop-address=0x221014 %t2 | FileCheck --check-prefix=CHECK-ARM2-EB %s
1414
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x622000 --stop-address=0x622002 | FileCheck --check-prefix=CHECK-THUMB2 %s
1515

16+
// RUN: ld.lld --be8 %t -o %t2
17+
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x21000 --stop-address=0x21008 %t2 | FileCheck --check-prefix=CHECK-ARM1 %s
18+
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x21008 --stop-address=0x2100c | FileCheck --check-prefix=CHECK-THUMB1 %s
19+
// RUN: llvm-objdump --no-print-imm-hex -d --triple=armv6eb-none-linux-gnueabi --start-address=0x22100c --stop-address=0x221014 %t2 | FileCheck --check-prefix=CHECK-ARM2-EB %s
20+
// RUN: llvm-objdump --no-print-imm-hex -d --triple=thumbv6eb-none-linux-gnueabi %t2 --start-address=0x622000 --stop-address=0x622002 | FileCheck --check-prefix=CHECK-THUMB2 %s
21+
1622
/// On Arm v6 the range of a Thumb BL instruction is only 4 megabytes as the
1723
/// extended range encoding is not supported. The following example has a Thumb
1824
/// BL that is out of range on ARM v6 and requires a range extension thunk.

lld/test/ELF/arm-data-relocs.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
// RUN: ld.lld %t.be.o %t256.be.o -o %t.be
1010
// RUN: llvm-objdump -s %t.be | FileCheck %s --check-prefixes=CHECK,BE
1111

12+
// RUN: ld.lld --be8 %t.be.o %t256.be.o -o %t.be8
13+
// RUN: llvm-objdump -s %t.be8 | FileCheck %s --check-prefixes=CHECK,BE
14+
1215
.globl _start
1316
_start:
1417
.section .R_ARM_ABS, "ax","progbits"

lld/test/ELF/arm-exidx-emit-relocs.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
// RUN: llvm-objdump -s --triple=armv7aeb-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EB %s
1010
// RUN: llvm-readelf --relocs %t2 | FileCheck -check-prefix=CHECK-RELOCS %s
1111

12+
// RUN: ld.lld --be8 --emit-relocs %t -o %t2
13+
// RUN: llvm-objdump -s --triple=armv7aeb-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EB %s
14+
// RUN: llvm-readelf --relocs %t2 | FileCheck -check-prefix=CHECK-RELOCS %s
15+
1216
/// LLD does not support --emit-relocs for .ARM.exidx sections as the relocations
1317
/// from synthetic table entries won't be represented. Given the known use cases
1418
/// of --emit-relocs, relocating kernels, and binary analysis, the former doesn't

0 commit comments

Comments
 (0)