-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[lld-macho] Support archives without index #132942
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lld-macho] Support archives without index #132942
Conversation
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-macho Author: Leonard Grey (speednoisemovement) ChangesThis is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in Porting the ELF follow-up to convert all archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O. NB: without an index, we can't do the part of the Full diff: https://github.com/llvm/llvm-project/pull/132942.diff 5 Files Affected:
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee26fafb50b73..dbb6b9a50a82f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
std::unique_ptr<object::Archive> archive = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
- if (!archive->isEmpty() && !archive->hasSymbolTable())
- error(path + ": archive has no index; run ranlib to add one");
file = make<ArchiveFile>(std::move(archive), isForceHidden);
if (tar && file->getArchive().isThin())
@@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
} else if (isCommandLineLoad && config->forceLoadObjC) {
- for (const object::Archive::Symbol &sym : file->getArchive().symbols())
- if (sym.getName().starts_with(objc::symbol_names::klass))
- file->fetch(sym);
+ if (file->getArchive().getNumberOfSymbols() > 0) {
+ for (const object::Archive::Symbol &sym : file->getArchive().symbols())
+ if (sym.getName().starts_with(objc::symbol_names::klass))
+ file->fetch(sym);
+ }
// TODO: no need to look for ObjC sections for a given archive member if
// we already found that it contains an ObjC symbol.
@@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
}
-
file->addLazySymbols();
loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
newFile = file;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9adfbc9d3f6f5..31eba6b47140d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2156,9 +2156,30 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
void ArchiveFile::addLazySymbols() {
// Avoid calling getMemoryBufferRef() on zero-symbol archive
// since that crashes.
- if (file->isEmpty() || file->getNumberOfSymbols() == 0)
+ if (file->isEmpty())
return;
+ if (file->getNumberOfSymbols() == 0) {
+ // No index, treat each child as a lazy object file.
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->children(e)) {
+ // Check `seen` but don't insert so a future eager load can still happen.
+ if (seen.contains(c.getChildOffset()))
+ continue;
+ if (!seenLazy.insert(c.getChildOffset()).second)
+ continue;
+ auto file = childToObjectFile(c, /*lazy=*/true);
+ if (!file)
+ error(toString(this) +
+ ": couldn't process child: " + toString(file.takeError()));
+ inputFiles.insert(*file);
+ }
+ if (e)
+ error(toString(this) +
+ ": Archive::children failed: " + toString(std::move(e)));
+ return;
+ }
+
Error err = Error::success();
auto child = file->child_begin(err);
// Ignore the I/O error here - will be reported later.
@@ -2188,16 +2209,17 @@ void ArchiveFile::addLazySymbols() {
static Expected<InputFile *>
loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
- uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
+ uint64_t offsetInArchive, bool forceHidden, bool compatArch,
+ bool lazy) {
if (config->zeroModTime)
modTime = 0;
switch (identify_magic(mb.getBuffer())) {
case file_magic::macho_object:
- return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
+ return make<ObjFile>(mb, modTime, archiveName, lazy, forceHidden,
compatArch);
case file_magic::bitcode:
- return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
+ return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy,
forceHidden, compatArch);
default:
return createStringError(inconvertibleErrorCode(),
@@ -2209,19 +2231,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!seen.insert(c.getChildOffset()).second)
return Error::success();
-
- Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
- if (!mb)
- return mb.takeError();
-
- Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
- if (!modTime)
- return modTime.takeError();
-
- Expected<InputFile *> file =
- loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
- forceHidden, compatArch);
-
+ auto file = childToObjectFile(c, /*lazy=*/false);
if (!file)
return file.takeError();
@@ -2248,6 +2258,21 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
toMachOString(symCopy) + ": " + toString(std::move(e)));
}
+Expected<InputFile *>
+ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
+ bool lazy) {
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb)
+ return mb.takeError();
+
+ Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+ if (!modTime)
+ return modTime.takeError();
+
+ return loadArchiveMember(*mb, toTimeT(*modTime), getName(),
+ c.getChildOffset(), forceHidden, compatArch, lazy);
+}
+
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
BitcodeFile &file) {
StringRef name = saver().save(objSym.getName());
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index bc8c8038a39d1..2d5bceb160445 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -297,10 +297,13 @@ class ArchiveFile final : public InputFile {
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
private:
+ Expected<InputFile *> childToObjectFile(const llvm::object::Archive::Child &c,
+ bool lazy);
std::unique_ptr<llvm::object::Archive> file;
// Keep track of children fetched from the archive by tracking
// which address offsets have been fetched already.
llvm::DenseSet<uint64_t> seen;
+ llvm::DenseSet<uint64_t> seenLazy;
// Load all symbols with hidden visibility (-load_hidden).
bool forceHidden;
};
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
new file mode 100644
index 0000000000000..37d28fda5c920
--- /dev/null
+++ b/lld/test/MachO/archive-no-index.s
@@ -0,0 +1,43 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
+# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
+# RUN: %lld %t/main.o %t/lib.a -o %t/out
+# RUN: llvm-nm %t/out | FileCheck %s
+
+# CHECK-NOT: T _bar
+# CHECK: T _foo
+
+## Test that every kind of eager load mechanism still works.
+# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
+# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load
+# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc
+# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD
+
+# FORCED-LOAD: T _bar
+
+#--- lib.s
+.global _foo
+_foo:
+ ret
+
+#--- lib2.s
+.section __DATA,__objc_catlist
+.quad 0x1234
+
+.section __TEXT,__text
+.global _bar
+_bar:
+ ret
+
+#--- main.s
+.global _main
+_main:
+ call _foo
+ mov $0, %rax
+ ret
diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
deleted file mode 100644
index 9cda945652500..0000000000000
--- a/lld/test/MachO/invalid/archive-no-index.s
+++ /dev/null
@@ -1,32 +0,0 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
-
-# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
-
-# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
-# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
-
-#--- 2.s
-.globl _boo
-_boo:
- ret
-
-#--- 3.s
-.globl _bar
-_bar:
- ret
-
-#--- 4.s
-.globl _undefined, _unused
-_unused:
- ret
-
-#--- main.s
-.global _main
-_main:
- mov $0, %rax
- ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
A few questions below, but this looks good :)
lld/MachO/InputFiles.cpp
Outdated
continue; | ||
} | ||
// First check seen. | ||
// Then, write to and check seenLazy |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These 3 lines of comments just repeat the code, I'd drop them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Old diff that got cleaned up, this was my todo list :D
// No index, treat each child as a lazy object file. | ||
Error e = Error::success(); | ||
for (const object::Archive::Child &c : file->children(e)) { | ||
// Check `seen` but don't insert so a future eager load can still happen. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is for when we first read a .a but it's then -all_load
/ -force_load
ed later, yes?
Do we parse all .o files a second time then? (In any case, this isn't something that we expect to happen in practice, yes?)
(ps: good tests for this)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was going off the logic in Driver.cpp, specifically:
// Command-line loads take precedence. If file is previously loaded via
// command line, or is loaded via LC_LINKER_OPTION and being loaded via
// LC_LINKER_OPTION again, using the cached archive is enough.
so I guess it's your scenario but s/force_load/LC_LINKER_OPTION. I think we would actually drop the force load in that case. Is that a bug?
@@ -0,0 +1,39 @@ | |||
# REQUIRES: x86 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we get a bitcode test too, since this adds support for that as well as far as I can tell?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD | ||
# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load | ||
# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD | ||
# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Re -ObjC: How expensive would it be to look at objc symbols in all LazyObjects?
(But fine to punt on this for now: As you say, it's not clear if it's needed, and the current behavior is consistent with -start-lib / -end-lib.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Relatively cheap if we do it ObjFile::parseLazy
. The annoying part would be going back and turning every lazy we just created into a defined partway through the iteration. I'm still confused on if this can actually happen though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
lld/test/MachO/archive-no-index.ll
Outdated
; RUN: rm -rf %t; split-file %s %t | ||
|
||
; RUN: llvm-as %t/lib.ll -o %t/lib.o | ||
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's with the mixed comment characters in front of the RUN lines?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copy and paste! Interestingly, lit doesn't appear to care
39ff9eb
to
192c54e
Compare
This is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in `--start-lib/--end-lib`) Porting the ELF follow-up to convert *all* archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O. NB: without an index, we can't do the part of the `-ObjC` scan where we check for Objective-C symbols directly. We *can* still check for `__obcj` sections so I wonder how much of a problem this actually is, since I'm not sure how the "symbols but no sections" case can appear in the wild.
This is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in
--start-lib/--end-lib
)Porting the ELF follow-up to convert all archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O.
NB: without an index, we can't do the part of the
-ObjC
scan where we check for Objective-C symbols directly. We can still check for__obcj
sections so I wonder how much of a problem this actually is, since I'm not sure how the "symbols but no sections" case can appear in the wild.