Skip to content

Commit 1d5ff30

Browse files
committed
merge main into amd-stg-open
Change-Id: I1a53984be35180f651ba64f92986ec87895fb1ad
2 parents 38e127d + a72cc95 commit 1d5ff30

File tree

969 files changed

+18742
-4696
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

969 files changed

+18742
-4696
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
8282
return OS;
8383
}
8484

85+
// AArch64-specific symbol markers used to delimit code/data in .text.
86+
enum class MarkerSymType : char {
87+
NONE = 0,
88+
CODE,
89+
DATA,
90+
};
91+
8592
enum class MemoryContentsType : char {
8693
UNKNOWN = 0, /// Unknown contents.
8794
POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
@@ -662,6 +669,11 @@ class BinaryContext {
662669
TheTriple->getArch() == llvm::Triple::x86_64;
663670
}
664671

672+
// AArch64-specific functions to check if symbol is used to delimit
673+
// code/data in .text. Code is marked by $x, data by $d.
674+
MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
675+
bool isMarker(const SymbolRef &Symbol) const;
676+
665677
/// Iterate over all BinaryData.
666678
iterator_range<binary_data_const_iterator> getBinaryData() const {
667679
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,11 +1948,6 @@ class BinaryFunction {
19481948
return ColdLSDASymbol;
19491949
}
19501950

1951-
/// True if the symbol is a mapping symbol used in AArch64 to delimit
1952-
/// data inside code section.
1953-
bool isDataMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
1954-
bool isCodeMarker(const SymbolRef &Symbol, uint64_t SymbolSize) const;
1955-
19561951
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
19571952

19581953
uint64_t getOutputDataAddress() const { return OutputDataOffset; }

bolt/lib/Core/BinaryContext.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,35 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
16391639
}
16401640
}
16411641

1642+
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1643+
// For aarch64, the ABI defines mapping symbols so we identify data in the
1644+
// code section (see IHI0056B). $x identifies a symbol starting code or the
1645+
// end of a data chunk inside code, $d indentifies start of data.
1646+
if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1647+
return MarkerSymType::NONE;
1648+
1649+
Expected<StringRef> NameOrError = Symbol.getName();
1650+
Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1651+
1652+
if (!TypeOrError || !NameOrError)
1653+
return MarkerSymType::NONE;
1654+
1655+
if (*TypeOrError != SymbolRef::ST_Unknown)
1656+
return MarkerSymType::NONE;
1657+
1658+
if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1659+
return MarkerSymType::CODE;
1660+
1661+
if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1662+
return MarkerSymType::DATA;
1663+
1664+
return MarkerSymType::NONE;
1665+
}
1666+
1667+
bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1668+
return getMarkerType(Symbol) != MarkerSymType::NONE;
1669+
}
1670+
16421671
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
16431672
uint64_t Offset,
16441673
const BinaryFunction *Function,

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3926,33 +3926,6 @@ void BinaryFunction::deleteConservativeEdges() {
39263926
}
39273927
}
39283928

3929-
bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
3930-
uint64_t SymbolSize) const {
3931-
// For aarch64, the ABI defines mapping symbols so we identify data in the
3932-
// code section (see IHI0056B). $d identifies a symbol starting data contents.
3933-
if (BC.isAArch64() && Symbol.getType() &&
3934-
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3935-
Symbol.getName() &&
3936-
(cantFail(Symbol.getName()) == "$d" ||
3937-
cantFail(Symbol.getName()).startswith("$d.")))
3938-
return true;
3939-
return false;
3940-
}
3941-
3942-
bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
3943-
uint64_t SymbolSize) const {
3944-
// For aarch64, the ABI defines mapping symbols so we identify data in the
3945-
// code section (see IHI0056B). $x identifies a symbol starting code or the
3946-
// end of a data chunk inside code.
3947-
if (BC.isAArch64() && Symbol.getType() &&
3948-
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
3949-
Symbol.getName() &&
3950-
(cantFail(Symbol.getName()) == "$x" ||
3951-
cantFail(Symbol.getName()).startswith("$x.")))
3952-
return true;
3953-
return false;
3954-
}
3955-
39563929
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
39573930
uint64_t SymbolSize) const {
39583931
// If this symbol is in a different section from the one where the
@@ -3963,7 +3936,7 @@ bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
39633936

39643937
// Some symbols are tolerated inside function bodies, others are not.
39653938
// The real function boundaries may not be known at this point.
3966-
if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
3939+
if (BC.isMarker(Symbol))
39673940
return true;
39683941

39693942
// It's okay to have a zero-sized symbol in the middle of non-zero-sized

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 78 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -880,47 +880,88 @@ void RewriteInstance::discoverFileObjects() {
880880
std::vector<SymbolRef> SortedFileSymbols;
881881
std::copy_if(InputFile->symbol_begin(), InputFile->symbol_end(),
882882
std::back_inserter(SortedFileSymbols), isSymbolInMemory);
883+
auto CompareSymbols = [this](const SymbolRef &A, const SymbolRef &B) {
884+
// Marker symbols have the highest precedence, while
885+
// SECTIONs have the lowest.
886+
auto AddressA = cantFail(A.getAddress());
887+
auto AddressB = cantFail(B.getAddress());
888+
if (AddressA != AddressB)
889+
return AddressA < AddressB;
890+
891+
bool AMarker = BC->isMarker(A);
892+
bool BMarker = BC->isMarker(B);
893+
if (AMarker || BMarker) {
894+
return AMarker && !BMarker;
895+
}
883896

884-
std::stable_sort(
885-
SortedFileSymbols.begin(), SortedFileSymbols.end(),
886-
[](const SymbolRef &A, const SymbolRef &B) {
887-
// FUNC symbols have the highest precedence, while SECTIONs
888-
// have the lowest.
889-
uint64_t AddressA = cantFail(A.getAddress());
890-
uint64_t AddressB = cantFail(B.getAddress());
891-
if (AddressA != AddressB)
892-
return AddressA < AddressB;
893-
894-
SymbolRef::Type AType = cantFail(A.getType());
895-
SymbolRef::Type BType = cantFail(B.getType());
896-
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
897-
return true;
898-
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
899-
return true;
897+
auto AType = cantFail(A.getType());
898+
auto BType = cantFail(B.getType());
899+
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
900+
return true;
901+
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
902+
return true;
900903

901-
return false;
902-
});
904+
return false;
905+
};
906+
907+
std::stable_sort(SortedFileSymbols.begin(), SortedFileSymbols.end(),
908+
CompareSymbols);
909+
910+
auto LastSymbol = SortedFileSymbols.end() - 1;
903911

904912
// For aarch64, the ABI defines mapping symbols so we identify data in the
905913
// code section (see IHI0056B). $d identifies data contents.
906-
auto LastSymbol = SortedFileSymbols.end() - 1;
914+
// Compilers usually merge multiple data objects in a single $d-$x interval,
915+
// but we need every data object to be marked with $d. Because of that we
916+
// create a vector of MarkerSyms with all locations of data objects.
917+
918+
struct MarkerSym {
919+
uint64_t Address;
920+
MarkerSymType Type;
921+
};
922+
923+
std::vector<MarkerSym> SortedMarkerSymbols;
924+
auto addExtraDataMarkerPerSymbol =
925+
[this](const std::vector<SymbolRef> &SortedFileSymbols,
926+
std::vector<MarkerSym> &SortedMarkerSymbols) {
927+
bool IsData = false;
928+
uint64_t LastAddr = 0;
929+
for (auto Sym = SortedFileSymbols.begin();
930+
Sym < SortedFileSymbols.end(); ++Sym) {
931+
uint64_t Address = cantFail(Sym->getAddress());
932+
if (LastAddr == Address) // don't repeat markers
933+
continue;
934+
935+
MarkerSymType MarkerType = BC->getMarkerType(*Sym);
936+
if (MarkerType != MarkerSymType::NONE) {
937+
SortedMarkerSymbols.push_back(MarkerSym{Address, MarkerType});
938+
LastAddr = Address;
939+
IsData = MarkerType == MarkerSymType::DATA;
940+
continue;
941+
}
942+
943+
if (IsData) {
944+
SortedMarkerSymbols.push_back(
945+
MarkerSym{cantFail(Sym->getAddress()), MarkerSymType::DATA});
946+
LastAddr = Address;
947+
}
948+
}
949+
};
950+
907951
if (BC->isAArch64()) {
952+
addExtraDataMarkerPerSymbol(SortedFileSymbols, SortedMarkerSymbols);
908953
LastSymbol = std::stable_partition(
909954
SortedFileSymbols.begin(), SortedFileSymbols.end(),
910-
[](const SymbolRef &Symbol) {
911-
StringRef Name = cantFail(Symbol.getName());
912-
return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
913-
(Name == "$d" || Name.startswith("$d.") || Name == "$x" ||
914-
Name.startswith("$x.")));
915-
});
955+
[this](const SymbolRef &Symbol) { return !BC->isMarker(Symbol); });
916956
--LastSymbol;
917957
}
918958

919959
BinaryFunction *PreviousFunction = nullptr;
920960
unsigned AnonymousId = 0;
921961

922-
const auto MarkersBegin = std::next(LastSymbol);
923-
for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
962+
const auto SortedSymbolsEnd = std::next(LastSymbol);
963+
for (auto ISym = SortedFileSymbols.begin(); ISym != SortedSymbolsEnd;
964+
++ISym) {
924965
const SymbolRef &Symbol = *ISym;
925966
// Keep undefined symbols for pretty printing?
926967
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
@@ -1213,25 +1254,24 @@ void RewriteInstance::discoverFileObjects() {
12131254
adjustFunctionBoundaries();
12141255

12151256
// Annotate functions with code/data markers in AArch64
1216-
for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
1217-
const SymbolRef &Symbol = *ISym;
1218-
uint64_t Address =
1219-
cantFail(Symbol.getAddress(), "cannot get symbol address");
1220-
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1221-
BinaryFunction *BF =
1222-
BC->getBinaryFunctionContainingAddress(Address, true, true);
1257+
for (auto ISym = SortedMarkerSymbols.begin();
1258+
ISym != SortedMarkerSymbols.end(); ++ISym) {
1259+
1260+
auto *BF =
1261+
BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
1262+
12231263
if (!BF) {
12241264
// Stray marker
12251265
continue;
12261266
}
1227-
const uint64_t EntryOffset = Address - BF->getAddress();
1228-
if (BF->isCodeMarker(Symbol, SymbolSize)) {
1267+
const auto EntryOffset = ISym->Address - BF->getAddress();
1268+
if (ISym->Type == MarkerSymType::CODE) {
12291269
BF->markCodeAtOffset(EntryOffset);
12301270
continue;
12311271
}
1232-
if (BF->isDataMarker(Symbol, SymbolSize)) {
1272+
if (ISym->Type == MarkerSymType::DATA) {
12331273
BF->markDataAtOffset(EntryOffset);
1234-
BC->AddressToConstantIslandMap[Address] = BF;
1274+
BC->AddressToConstantIslandMap[ISym->Address] = BF;
12351275
continue;
12361276
}
12371277
llvm_unreachable("Unknown marker");
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
--- !ELF
2+
FileHeader:
3+
Class: ELFCLASS64
4+
Data: ELFDATA2LSB
5+
Type: ET_EXEC
6+
Machine: EM_AARCH64
7+
Entry: 0x210134
8+
ProgramHeaders:
9+
- Type: PT_PHDR
10+
Flags: [ PF_R ]
11+
VAddr: 0x200040
12+
Align: 0x8
13+
FileSize: 0x0000e0
14+
MemSize: 0x0000e0
15+
Offset: 0x000040
16+
- Type: PT_LOAD
17+
Flags: [ PF_R ]
18+
VAddr: 0x200000
19+
Align: 0x10000
20+
FileSize: 0x000120
21+
MemSize: 0x000120
22+
Offset: 0x000000
23+
- Type: PT_LOAD
24+
Flags: [ PF_X, PF_R ]
25+
FirstSec: .text
26+
LastSec: .text
27+
VAddr: 0x210120
28+
Align: 0x10000
29+
- Type: PT_GNU_STACK
30+
Flags: [ PF_W, PF_R ]
31+
Align: 0x0
32+
Sections:
33+
- Name: .text
34+
Type: SHT_PROGBITS
35+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
36+
Address: 0x210120
37+
AddressAlign: 0x4
38+
Content: 030F0B0700000000030F0B0700000000C0035FD6FFFFFF97000080D2A80B8052010000D4
39+
- Name: .rela.text
40+
Type: SHT_RELA
41+
Flags: [ SHF_INFO_LINK ]
42+
Link: .symtab
43+
AddressAlign: 0x8
44+
Info: .text
45+
Relocations:
46+
- Offset: 0x210134
47+
Symbol: dummy
48+
Type: R_AARCH64_CALL26
49+
- Name: .comment
50+
Type: SHT_PROGBITS
51+
Flags: [ SHF_MERGE, SHF_STRINGS ]
52+
AddressAlign: 0x1
53+
EntSize: 0x1
54+
Content: 4C696E6B65723A204C4C442031352E302E3000
55+
Symbols:
56+
- Name: val
57+
Index: SHN_ABS
58+
Value: 0x70B0F03
59+
- Name: first
60+
Section: .text
61+
Value: 0x210120
62+
Size: 0x8
63+
- Name: '$d.0'
64+
Section: .text
65+
Value: 0x210120
66+
- Name: second
67+
Section: .text
68+
Value: 0x210128
69+
Size: 0x8
70+
- Name: '$x.1'
71+
Section: .text
72+
Value: 0x210130
73+
- Name: .text
74+
Type: STT_SECTION
75+
Section: .text
76+
Value: 0x210120
77+
- Name: .comment
78+
Type: STT_SECTION
79+
Section: .comment
80+
- Name: dummy
81+
Type: STT_FUNC
82+
Section: .text
83+
Binding: STB_GLOBAL
84+
Value: 0x210130
85+
- Name: _start
86+
Type: STT_FUNC
87+
Section: .text
88+
Binding: STB_GLOBAL
89+
Value: 0x210134
90+
...

bolt/test/AArch64/unmarked-data.test

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// This test checks that multiple data objects in text of which only first is marked get disassembled properly
2+
3+
// RUN: yaml2obj %S/Inputs/unmarked-data.yaml -o %t.exe
4+
// RUN: llvm-bolt %t.exe -o %t.bolt -lite=0 -use-old-text=0 2>&1 | FileCheck %s
5+
// CHECK-NOT: BOLT-WARNING
6+
// RUN: llvm-objdump -j .text -d --disassemble-symbols=first,second %t.bolt | FileCheck %s -check-prefix=CHECK-SYMBOL
7+
// CHECK-SYMBOL: <first>:
8+
// CHECK-SYMBOL: <second>:
9+
10+
// YAML is based in the following assembly:
11+
12+
.equ val, 0x070b0f03 // we use constant that is not a valid instruction so that it can't be silently dissassembled
13+
.text
14+
15+
first:
16+
.xword val
17+
.size first, .-first
18+
19+
second:
20+
.xword val
21+
.size second, .-second
22+
23+
.globl dummy
24+
.type dummy, %function
25+
dummy: // dummy function to force relocations
26+
ret
27+
28+
.globl _start
29+
.type _start, %function
30+
_start:
31+
bl dummy
32+
mov x0, #0
33+
mov w8, #93
34+
svc #0

0 commit comments

Comments
 (0)