llvm · pcc · Jul 18, 2024 · Jun 27, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
@@ -482,16 +482,47 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
 }
 
 static Expected<std::unique_ptr<SymbolicFile>>
-getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
+getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
+                object::Archive::Kind Kind) {
   const file_magic Type = identify_magic(Buf.getBuffer());
   // Don't attempt to read non-symbolic file types.
   if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
     return nullptr;
   if (Type == file_magic::bitcode) {
     auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
         Buf, file_magic::bitcode, &Context);
-    if (!ObjOrErr)
-      return ObjOrErr.takeError();
+    // An error reading a bitcode file most likely indicates that the file
+    // was created by a compiler from the future. Normally we don't try to
+    // implement forwards compatibility for bitcode files, but when creating an
+    // archive we can implement best-effort forwards compatibility by treating
+    // the file as a blob and not creating symbol index entries for it. lld and
+    // mold ignore the archive symbol index, so provided that you use one of
+    // these linkers, LTO will work as long as lld or the gold plugin is newer
+    // than the compiler. We only ignore errors if the archive format is one
+    // that is supported by a linker that is known to ignore the index,
+    // otherwise there's no chance of this working so we may as well error out.
+    // We print a warning on read failure so that users of linkers that rely on
+    // the symbol index can diagnose the issue.
+    //
+    // This is the same behavior as GNU ar when the linker plugin returns an
+    // error when reading the input file. If the bitcode file is actually
+    // malformed, it will be diagnosed at link time.
+    if (!ObjOrErr) {
+      switch (Kind) {
+      case object::Archive::K_BSD:
+      case object::Archive::K_GNU:
+      case object::Archive::K_GNU64:
+        llvm::logAllUnhandledErrors(ObjOrErr.takeError(), llvm::errs(),
+                                    "warning: " + Buf.getBufferIdentifier() +
+                                        ": ");
+        return nullptr;
+      case object::Archive::K_AIXBIG:
+      case object::Archive::K_COFF:
+      case object::Archive::K_DARWIN:
+      case object::Archive::K_DARWIN64:
+        return ObjOrErr.takeError();
+      }
+    }
     return std::move(*ObjOrErr);
   } else {
     auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
@@ -820,7 +851,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
   if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
     for (const NewArchiveMember &M : NewMembers) {
       Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
-          getSymbolicFile(M.Buf->getMemBufferRef(), Context);
+          getSymbolicFile(M.Buf->getMemBufferRef(), Context, Kind);
       if (!SymFileOrErr)
         return createFileError(M.MemberName, SymFileOrErr.takeError());
       SymFiles.push_back(std::move(*SymFileOrErr));

diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test
@@ -1,5 +1,6 @@
 ## Show that the archive library emits error messages when adding malformed
-## objects.
+## object files and skips symbol tables for "malformed" bitcode files, which
+## are assumed to be bitcode files generated by compilers from the future.
 
 # RUN: rm -rf %t.dir
 # RUN: split-file %s %t.dir
@@ -9,37 +10,52 @@
 # RUN: llvm-as input.ll -o input.bc
 # RUN: cp input.bc good.bc
 # RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)"
-# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+
+# llvm-nm will fail when it tries to read the malformed bitcode file, but
+# it's supposed to print the archive map first, which in this case it
+# won't because there won't be one.
+# RUN: not llvm-nm --print-armap bad.a | count 0
 
 ## Malformed bitcode object is the last file member of archive if the symbol table is required.
 # RUN: rm -rf bad.a
-# RUN: not llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP
 
 ## Malformed bitcode object if the symbol table is not required for big archive.
+## For big archives we print an error instead of a warning because the AIX linker
+## presumably requires the index.
 # RUN: rm -rf bad.a
 # RUN: not llvm-ar --format=bigarchive rcS bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
 # RUN: rm -rf bad.a
 # RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
 
 # ERR1: error: bad.a: 'input.bc': Invalid bitcode signature
+# WARN1: warning: input.bc: Invalid bitcode signature
 
 ## Non-bitcode malformed file.
 # RUN: yaml2obj input.yaml -o input.o
 # RUN: not llvm-ar rc bad.a input.o 2>&1 | FileCheck %s --check-prefix=ERR2
 
 # ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999
 
-## Don't emit an error if the symbol table is not required for formats other than the big archive format.
-# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc
+## Don't emit an error or warning if the symbol table is not required for formats other than the big archive format.
+# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc 2>&1 | count 0
 # RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS
 
 # CONTENTS:      input.o
 # CONTENTS-NEXT: input.bc
 
+# ARMAP: Archive map
+# ARMAP-NEXT: foo in good.bc
+# ARMAP-EMPTY:
+
 #--- input.ll
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux"
 
+@foo = global i32 1
+
 #--- input.yaml
 --- !ELF
 FileHeader: