Skip to content

[lld][COFF] Remove duplicate strtab entries #141197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Parallel.h"
Expand Down Expand Up @@ -201,7 +202,8 @@ struct ChunkRange {
class Writer {
public:
Writer(COFFLinkerContext &c)
: buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {}
: buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
delayIdata(c), ctx(c) {}
void run();

private:
Expand Down Expand Up @@ -281,7 +283,7 @@ class Writer {

std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
std::vector<char> strtab;
StringTableBuilder strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
std::vector<ECCodeMapEntry> codeMap;
IdataContents idata;
Expand Down Expand Up @@ -1434,14 +1436,6 @@ void Writer::assignOutputSectionIndices() {
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
}

size_t Writer::addEntryToStringTable(StringRef str) {
assert(str.size() > COFF::NameSize);
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
strtab.insert(strtab.end(), str.begin(), str.end());
strtab.push_back('\0');
return offsetOfEntry;
}

std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
coff_symbol16 sym;
switch (def->kind()) {
Expand Down Expand Up @@ -1482,7 +1476,8 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
StringRef name = def->getName();
if (name.size() > COFF::NameSize) {
sym.Name.Offset.Zeroes = 0;
sym.Name.Offset.Offset = addEntryToStringTable(name);
sym.Name.Offset.Offset = 0; // Filled in later.
strtab.add(name);
} else {
memset(sym.Name.ShortName, 0, COFF::NameSize);
memcpy(sym.Name.ShortName, name.data(), name.size());
Expand Down Expand Up @@ -1514,6 +1509,7 @@ void Writer::createSymbolAndStringTable() {
// solution where discardable sections have long names preserved and
// non-discardable sections have their names truncated, to ensure that any
// section which is mapped at runtime also has its name mapped at runtime.
SmallVector<OutputSection *> longNameSections;
for (OutputSection *sec : ctx.outputSections) {
if (sec->name.size() <= COFF::NameSize)
continue;
Expand All @@ -1525,9 +1521,13 @@ void Writer::createSymbolAndStringTable() {
<< " is longer than 8 characters and will use a non-standard string "
"table";
}
sec->setStringTableOff(addEntryToStringTable(sec->name));
// Put the section name in the begin of strtab so that its offset is less
// than Max7DecimalOffset otherwise lldb/gdb will not read it.
strtab.add(sec->name, /*Priority=*/UINT8_MAX);
longNameSections.push_back(sec);
}

std::vector<std::pair<size_t, StringRef>> longNameSymbols;
if (ctx.config.writeSymtab) {
for (ObjFile *file : ctx.objFileInstances) {
for (Symbol *b : file->getSymbols()) {
Expand All @@ -1542,15 +1542,22 @@ void Writer::createSymbolAndStringTable() {
continue;
}

if (std::optional<coff_symbol16> sym = createSymbol(d))
if (std::optional<coff_symbol16> sym = createSymbol(d)) {
if (d->getName().size() > COFF::NameSize)
longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
outputSymtab.push_back(*sym);
}

if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
if (!dthunk->wrappedSym->writtenToSymtab) {
dthunk->wrappedSym->writtenToSymtab = true;
if (std::optional<coff_symbol16> sym =
createSymbol(dthunk->wrappedSym))
createSymbol(dthunk->wrappedSym)) {
if (d->getName().size() > COFF::NameSize)
longNameSymbols.emplace_back(outputSymtab.size(),
dthunk->wrappedSym->getName());
outputSymtab.push_back(*sym);
}
}
}
}
Expand All @@ -1560,11 +1567,19 @@ void Writer::createSymbolAndStringTable() {
if (outputSymtab.empty() && strtab.empty())
return;

strtab.finalize();
for (OutputSection *sec : longNameSections)
sec->setStringTableOff(strtab.getOffset(sec->name));
for (auto P : longNameSymbols) {
coff_symbol16 &sym = outputSymtab[P.first];
sym.Name.Offset.Offset = strtab.getOffset(P.second);
}

// We position the symbol table to be adjacent to the end of the last section.
uint64_t fileOff = fileSize;
pointerToSymbolTable = fileOff;
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
fileOff += 4 + strtab.size();
fileOff += strtab.getSize();
fileSize = alignTo(fileOff, ctx.config.fileAlign);
}

Expand Down Expand Up @@ -1945,9 +1960,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
write32le(buf, strtab.size() + 4);
if (!strtab.empty())
memcpy(buf + 4, strtab.data(), strtab.size());
strtab.write(buf);
}

void Writer::openFile(StringRef path) {
Expand Down
29 changes: 29 additions & 0 deletions lld/test/COFF/strtab.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it work if x86 is not configured?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have marked the test as requiring x86 in 757c80d.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I didn't aware of that.

# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf
# RUN: llvm-readobj --string-table %t.exe | FileCheck %s

# CHECK: StringTable {
# CHECK-NEXT: Length: 87
# CHECK-NEXT: [ 4] .debug_abbrev
# CHECK-NEXT: [ 12] .debug_line
# CHECK-NEXT: [ 1e] long_name_symbolz
# CHECK-NEXT: [ 30] .debug_abbrez
# CHECK-NEXT: [ 3e] __impl_long_name_symbolA
# CHECK-NEXT: }


.global main
.text
main:
long_name_symbolz:
long_name_symbolA:
__impl_long_name_symbolA:
name_symbolA:
.debug_abbrez:
ret

.section .debug_abbrev,"dr"
.byte 0

.section .debug_line,"dr"
.byte 0
18 changes: 13 additions & 5 deletions llvm/include/llvm/MC/StringTableBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class StringTableBuilder {
};

private:
// Only non-zero priority will be recorded.
DenseMap<CachedHashStringRef, uint8_t> StringPriorityMap;
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
size_t Size = 0;
Kind K;
Expand All @@ -51,11 +53,16 @@ class StringTableBuilder {
LLVM_ABI StringTableBuilder(Kind K, Align Alignment = Align(1));
LLVM_ABI ~StringTableBuilder();

/// Add a string to the builder. Returns the position of S in the
/// table. The position will be changed if finalize is used.
/// Can only be used before the table is finalized.
LLVM_ABI size_t add(CachedHashStringRef S);
size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
/// Add a string to the builder. Returns the position of S in the table. The
/// position will be changed if finalize is used. Can only be used before the
/// table is finalized. Priority is only useful with reordering. Strings with
/// the same priority will be put together. Strings with higher priority are
/// placed closer to the begin of string table. When adding same string with
/// different priority, the maximum priority win.
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment should mention what if a string is added with different priorities. (std::max below)

size_t add(StringRef S, uint8_t Priority = 0) {
return add(CachedHashStringRef(S), Priority);
}

/// Analyze the strings and build the final table. No more strings can
/// be added after this point.
Expand All @@ -78,6 +85,7 @@ class StringTableBuilder {
bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); }
bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); }

bool empty() const { return StringIndexMap.empty(); }
size_t getSize() const { return Size; }
LLVM_ABI void clear();

Expand Down
27 changes: 24 additions & 3 deletions llvm/lib/MC/StringTableBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,31 @@ void StringTableBuilder::finalizeInOrder() {
void StringTableBuilder::finalizeStringTable(bool Optimize) {
Finalized = true;

if (Optimize) {
if (Optimize && StringIndexMap.size()) {
std::vector<StringPair *> Strings;
Strings.reserve(StringIndexMap.size());
for (StringPair &P : StringIndexMap)
Strings.push_back(&P);

multikeySort(Strings, 0);
size_t RangeBegin = 0;
MutableArrayRef<StringPair *> StringsRef(Strings);
if (StringPriorityMap.size()) {
llvm::sort(Strings,
[&](const StringPair *LHS, const StringPair *RHS) -> bool {
return StringPriorityMap.lookup(LHS->first) >
StringPriorityMap.lookup(RHS->first);
});
uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first);
for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) {
uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first);
if (Priority != RangePriority) {
multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0);
RangePriority = Priority;
RangeBegin = I;
}
}
}
multikeySort(StringsRef.slice(RangeBegin), 0);
initSize();

StringRef Previous;
Expand Down Expand Up @@ -199,11 +217,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
return I->second;
}

size_t StringTableBuilder::add(CachedHashStringRef S) {
size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) {
if (K == WinCOFF)
assert(S.size() > COFF::NameSize && "Short string in COFF string table!");

assert(!isFinalized());
if (Priority)
StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]);

auto P = StringIndexMap.try_emplace(S);
if (P.second) {
size_t Start = alignTo(Size, Alignment);
Expand Down
Loading