-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ELF] Detect convergence of output section addresses #93888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ELF] Detect convergence of output section addresses #93888
Conversation
Created using spr 1.3.5-bogner
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Fangrui Song (MaskRay) ChangesSome linker scripts don't converge. https://reviews.llvm.org/D66279 This patch detects convergence of output section addresses. While input GNU ld has an error
but not the following variant:
Our approach consistently rejects both cases. Link: https://discourse.llvm.org/t/lld-and-layout-convergence/79232 Full diff: https://github.com/llvm/llvm-project/pull/93888.diff 5 Files Affected:
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index bfc13b658f5bf..bd859a8a4c275 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -1025,13 +1025,14 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
return nullptr;
}
-// This function assigns offsets to input sections and an output section
-// for a single sections command (e.g. ".text { *(.text); }").
-void LinkerScript::assignOffsets(OutputSection *sec) {
+// Assign addresses to an output section and offsets to its input sections and
+// symbol assignments. Return true if the output section's address has changed.
+bool LinkerScript::assignOffsets(OutputSection *sec) {
const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS;
const bool sameMemRegion = state->memRegion == sec->memRegion;
const bool prevLMARegionIsDefault = state->lmaRegion == nullptr;
const uint64_t savedDot = dot;
+ bool addressChanged = false;
state->memRegion = sec->memRegion;
state->lmaRegion = sec->lmaRegion;
@@ -1068,6 +1069,7 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
dot = alignToPowerOf2(dot, sec->addralign);
expandMemoryRegions(dot - pos);
}
+ addressChanged = sec->addr != dot;
sec->addr = dot;
// state->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT()
@@ -1151,6 +1153,7 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
state->tbssAddr = dot;
dot = savedDot;
}
+ return addressChanged;
}
static bool isDiscardable(const OutputSection &sec) {
@@ -1389,7 +1392,8 @@ LinkerScript::AddressState::AddressState() {
// we also handle rest commands like symbol assignments and ASSERTs.
// Returns a symbol that has changed its section or value, or nullptr if no
// symbol has changed.
-const Defined *LinkerScript::assignAddresses() {
+std::pair<const OutputSection *, const Defined *>
+LinkerScript::assignAddresses() {
if (script->hasSectionsCommand) {
// With a linker script, assignment of addresses to headers is covered by
// allocateHeaders().
@@ -1402,6 +1406,7 @@ const Defined *LinkerScript::assignAddresses() {
dot += getHeaderSize();
}
+ OutputSection *changedOsec = nullptr;
AddressState st;
state = &st;
errorOnMissingSection = true;
@@ -1416,11 +1421,12 @@ const Defined *LinkerScript::assignAddresses() {
assign->size = dot - assign->addr;
continue;
}
- assignOffsets(&cast<OutputDesc>(cmd)->osec);
+ if (assignOffsets(&cast<OutputDesc>(cmd)->osec) && !changedOsec)
+ changedOsec = &cast<OutputDesc>(cmd)->osec;
}
state = nullptr;
- return getChangedSymbolAssignment(oldValues);
+ return {changedOsec, getChangedSymbolAssignment(oldValues)};
}
static bool hasRegionOverflowed(MemoryRegion *mr) {
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 734d4e7498aa2..36feab36e26ba 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -300,7 +300,7 @@ class LinkerScript final {
std::pair<MemoryRegion *, MemoryRegion *>
findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
- void assignOffsets(OutputSection *sec);
+ bool assignOffsets(OutputSection *sec);
// This captures the local AddressState and makes it accessible
// deliberately. This is needed as there are some cases where we cannot just
@@ -334,7 +334,7 @@ class LinkerScript final {
bool needsInterpSection();
bool shouldKeep(InputSectionBase *s);
- const Defined *assignAddresses();
+ std::pair<const OutputSection *, const Defined *> assignAddresses();
bool spillSections();
void erasePotentialSpillSections();
void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index c498153f3348b..c2ccc4f49ad2e 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1479,16 +1479,22 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
changed |= part.memtagGlobalDescriptors->updateAllocSize();
}
- const Defined *changedSym = script->assignAddresses();
+ std::pair<const OutputSection *, const Defined *> changes =
+ script->assignAddresses();
if (!changed) {
// Some symbols may be dependent on section addresses. When we break the
// loop, the symbol values are finalized because a previous
// assignAddresses() finalized section addresses.
- if (!changedSym)
+ if (!changes.first && !changes.second)
break;
if (++assignPasses == 5) {
- errorOrWarn("assignment to symbol " + toString(*changedSym) +
- " does not converge");
+ if (changes.first)
+ errorOrWarn("address (0x" + Twine::utohexstr(changes.first->addr) +
+ ") of section '" + changes.first->name +
+ "' does not converge");
+ if (changes.second)
+ errorOrWarn("assignment to symbol " + toString(*changes.second) +
+ " does not converge");
break;
}
} else if (spilled) {
diff --git a/lld/test/ELF/linkerscript/memory-err.s b/lld/test/ELF/linkerscript/memory-err.s
index 98e71e79f17d8..5ec190a415b29 100644
--- a/lld/test/ELF/linkerscript/memory-err.s
+++ b/lld/test/ELF/linkerscript/memory-err.s
@@ -68,8 +68,8 @@
# RUN: symbol = .; \
# RUN: .data : { *(.data) } > ram \
# RUN: }' > %t.script
-# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR_OVERFLOW %s
-# ERR_OVERFLOW: error: section '.data' will not fit in region 'ram': overflowed by 2 bytes
+# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=NOT_CONVERGE %s
+# NOT_CONVERGE: error: address (0x14) of section '.text' does not converge
nop
diff --git a/lld/test/ELF/linkerscript/section-not-converge.test b/lld/test/ELF/linkerscript/section-not-converge.test
new file mode 100644
index 0000000000000..99e9eeb4f2d7a
--- /dev/null
+++ b/lld/test/ELF/linkerscript/section-not-converge.test
@@ -0,0 +1,37 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+
+# RUN: not ld.lld a.o -T a.lds 2>&1 | FileCheck %s --implicit-check-not=error:
+# CHECK: error: address (0x6014) of section '.text' does not converge
+
+# RUN: ld.lld a.o -T b.lds --noinhibit-exec 2>&1 | FileCheck %s --check-prefix=CHECK2 --implicit-check-not=warning:
+# CHECK2: warning: address (0x5014) of section '.text' does not converge
+# CHECK2: warning: assignment to symbol a does not converge
+
+#--- a.s
+.globl _start
+_start: .space 4
+.data; .byte 0
+
+#--- a.lds
+SECTIONS {
+ . = 0x1000;
+ .text ADDR(.data) + 0x1000 : { *(.text) }
+ .data : { *(.data) }
+}
+
+#--- b.lds
+SECTIONS {
+ . = 0x1000;
+ .text text : { *(.text) }
+ .data : {
+ *(.data)
+ x = ADDR(.text);
+ a = b;
+ b = c;
+ ## Absolute symbol; not converging
+ c = ABSOLUTE(ADDR(.text));
+ }
+ text = ADDR(.data) + 0x1000;
+}
|
Created using spr 1.3.5-bogner
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR_OVERFLOW %s | ||
# ERR_OVERFLOW: error: section '.data' will not fit in region 'ram': overflowed by 2 bytes | ||
# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=NOT_CONVERGE %s | ||
# NOT_CONVERGE: error: address (0x14) of section '.text' does not converge |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this example expected to not converge?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. The memory region is ram : ORIGIN = symbol, LENGTH = 4094
. The origin and the symbol address will be increased in each iteration step.
Some linker scripts don't converge. https://reviews.llvm.org/D66279
("[ELF] Make LinkerScript::assignAddresses iterative") detected
convergence of symbol assignments.
This patch detects convergence of output section addresses. While input
sections might also have convergence issues, they are less common as
expressions that could cause convergence issues typically involve output
sections and symbol assignments.
GNU ld has an error
non constant or forward reference address expression for section
thatcorrectly rejects
but not the following variant:
Our approach consistently rejects both cases.
Link: https://discourse.llvm.org/t/lld-and-layout-convergence/79232