Skip to content

[BOLT] Abort on out-of-section symbols in GOT #100801

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,8 @@ class BinaryContext {
/// of \p Flags.
MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
uint64_t Size, uint16_t Alignment,
unsigned Flags = 0);
unsigned Flags = 0,
BinarySection *Section = NULL);

/// Return BinaryData registered at a given \p Address or nullptr if no
/// global symbol was registered at the location.
Expand Down
22 changes: 16 additions & 6 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1056,18 +1056,28 @@ void BinaryContext::adjustCodePadding() {
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
uint64_t Size,
uint16_t Alignment,
unsigned Flags) {
unsigned Flags,
BinarySection *Section) {
// Register the name with MCContext.
MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
BinaryData *BD;

// Register out of section symbols only in GlobalSymbols map
if (Section && Section->getEndAddress() == Address) {
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
*Section, Flags);
GlobalSymbols[Name] = BD;
return Symbol;
}

auto GAI = BinaryDataMap.find(Address);
BinaryData *BD;
if (GAI == BinaryDataMap.end()) {
ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
BinarySection &Section =
SectionOrErr ? SectionOrErr.get() : absoluteSection();
BinarySection &SectionRef = Section ? *Section
: SectionOrErr ? SectionOrErr.get()
: absoluteSection();
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
Section, Flags);
SectionRef, Flags);
GAI = BinaryDataMap.emplace(Address, BD).first;
GlobalSymbols[Name] = BD;
updateObjectNesting(GAI);
Expand Down Expand Up @@ -1402,7 +1412,7 @@ void BinaryContext::postProcessSymbolTable() {
if ((BD->getName().starts_with("SYMBOLat") ||
BD->getName().starts_with("DATAat")) &&
!BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
BD->getSection()) {
BD->getSection().getSize()) {
this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
<< "\n";
Valid = false;
Expand Down
51 changes: 44 additions & 7 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,13 +955,13 @@ void RewriteInstance::discoverFileObjects() {
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
uint64_t SymbolAlignment = Symbol.getAlignment();

auto registerName = [&](uint64_t FinalSize) {
auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) {
// Register names even if it's not a function, e.g. for an entry point.
BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
SymbolAlignment, SymbolFlags);
SymbolAlignment, SymbolFlags, Section);
if (!AlternativeName.empty())
BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
SymbolAlignment, SymbolFlags);
SymbolAlignment, SymbolFlags, Section);
};

section_iterator Section =
Expand All @@ -986,12 +986,25 @@ void RewriteInstance::discoverFileObjects() {
<< " for function\n");

if (SymbolAddress == Section->getAddress() + Section->getSize()) {
ErrorOr<BinarySection &> SectionOrError =
BC->getSectionForAddress(Section->getAddress());

// Skip symbols from invalid sections
if (!SectionOrError) {
BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
<< Twine::utohexstr(SymbolAddress)
<< ") does not have any section\n";
continue;
}

assert(SymbolSize == 0 &&
"unexpect non-zero sized symbol at end of section");
LLVM_DEBUG(
dbgs()
<< "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
registerName(SymbolSize);
LLVM_DEBUG({
dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName
<< " points to end of " << SectionOrError->getName()
<< " section\n";
});
registerName(SymbolSize, &SectionOrError.get());
continue;
}

Expand Down Expand Up @@ -2614,6 +2627,30 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
}
}

if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) {
auto exitOnGotEndSymol = [&](StringRef Name) {
BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported "
"section end symbol "
<< Name << "\n";
exit(1);
};

if (SymbolIter != InputFile->symbol_end() && ReferencedSection) {
if (cantFail(SymbolIter->getAddress()) ==
ReferencedSection->getEndAddress())
exitOnGotEndSymol(cantFail(SymbolIter->getName()));
} else {
// If no section and symbol are provided by relocation, try to find the
// symbol by its name, including the possibility that the symbol is local.
BinaryData *BD = BC->getBinaryDataByName(SymbolName);
if (!BD && NR.getUniquifiedNameCount(SymbolName) == 1)
BD = BC->getBinaryDataByName(NR.getUniqueName(SymbolName, 1));

if ((BD && BD->getAddress() == BD->getSection().getEndAddress()))
exitOnGotEndSymol(BD->getName());
}
}

if (!ReferencedSection)
ReferencedSection = BC->getSectionForAddress(SymbolAddress);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
SECTIONS {
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
.data : { *(.data) *(.array) }
.text : { *(.text) }
.got : { *(.got) *(.igot) }
}
28 changes: 28 additions & 0 deletions bolt/test/AArch64/got_end_of_section_symbol.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %clang %cflags -nostartfiles -nodefaultlibs -static -Wl,--no-relax \
# RUN: -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script \
# RUN: %t.o -o %t.exe
# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s

# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end
# CHECK-SAME: symbol array_end

.section .array, "a", @progbits
.globl array_start
.globl array_end
array_start:
.word 0
array_end:

.section .text
.globl _start
.type _start, %function
_start:
adrp x1, #:got:array_start
ldr x1, [x1, #:got_lo12:array_start]
adrp x0, #:got:array_end
ldr x0, [x0, #:got_lo12:array_end]
adrp x2, #:got:_start
ldr x2, [x2, #:got_lo12:_start]
ret
4 changes: 2 additions & 2 deletions bolt/test/X86/section-end-sym.s
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as
## functions.

# REQUIRES: x86_64-linux, asserts
# REQUIRES: system-linux, asserts

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
# RUN: ld.lld %t.o -o %t.exe -q
# RUN: llvm-bolt %t.exe -o %t.null --print-cfg --debug-only=bolt 2>&1 \
# RUN: | FileCheck %s

# CHECK: considering symbol etext for function
# CHECK-NEXT: rejecting as symbol points to end of its section
# CHECK-NEXT: rejecting as symbol etext points to end of .text section
# CHECK-NOT: Binary Function "etext{{.*}}" after building cfg


Expand Down
Loading