Skip to content

Commit c675a9b

Browse files
authored
Object: Don't error out on malformed bitcode files.
An error reading a bitcode file most likely indicates that the file was created by a compiler from the future. Normally we don't try to implement forwards compatibility for bitcode files, but when creating an archive we can implement best-effort forwards compatibility by treating the file as a blob and not creating symbol index entries for it. lld and mold ignore the archive symbol index, so provided that you use one of these linkers, LTO will work as long as lld or the gold plugin is newer than the compiler. We only ignore errors if the archive format is one that is supported by a linker that is known to ignore the index, otherwise there's no chance of this working so we may as well error out. We print a warning on read failure so that users of linkers that rely on the symbol index can diagnose the issue. This is the same behavior as GNU ar when the linker plugin returns an error when reading the input file. If the bitcode file is actually malformed, it will be diagnosed at link time. Reviewers: MaskRay, dwblaikie, jh7370 Reviewed By: MaskRay, dwblaikie, jh7370 Pull Request: #96848
1 parent adacb50 commit c675a9b

File tree

3 files changed

+96
-23
lines changed

3 files changed

+96
-23
lines changed

llvm/include/llvm/Object/ArchiveWriter.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,25 +48,30 @@ enum class SymtabWritingMode {
4848
BigArchive64 // Only write the 64-bit symbol table.
4949
};
5050

51+
void warnToStderr(Error Err);
52+
5153
// Write an archive directly to an output stream.
5254
Error writeArchiveToStream(raw_ostream &Out,
5355
ArrayRef<NewArchiveMember> NewMembers,
5456
SymtabWritingMode WriteSymtab,
5557
object::Archive::Kind Kind, bool Deterministic,
56-
bool Thin, std::optional<bool> IsEC = std::nullopt);
58+
bool Thin, std::optional<bool> IsEC = std::nullopt,
59+
function_ref<void(Error)> Warn = warnToStderr);
5760

5861
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
5962
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
6063
bool Deterministic, bool Thin,
6164
std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
62-
std::optional<bool> IsEC = std::nullopt);
65+
std::optional<bool> IsEC = std::nullopt,
66+
function_ref<void(Error)> Warn = warnToStderr);
6367

6468
// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
6569
// buffer instead of writing it out to a file.
6670
Expected<std::unique_ptr<MemoryBuffer>>
6771
writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
6872
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
69-
bool Deterministic, bool Thin);
73+
bool Deterministic, bool Thin,
74+
function_ref<void(Error)> Warn = warnToStderr);
7075
}
7176

7277
#endif

llvm/lib/Object/ArchiveWriter.cpp

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -482,16 +482,45 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
482482
}
483483

484484
static Expected<std::unique_ptr<SymbolicFile>>
485-
getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
485+
getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
486+
object::Archive::Kind Kind, function_ref<void(Error)> Warn) {
486487
const file_magic Type = identify_magic(Buf.getBuffer());
487488
// Don't attempt to read non-symbolic file types.
488489
if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
489490
return nullptr;
490491
if (Type == file_magic::bitcode) {
491492
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
492493
Buf, file_magic::bitcode, &Context);
493-
if (!ObjOrErr)
494-
return ObjOrErr.takeError();
494+
// An error reading a bitcode file most likely indicates that the file
495+
// was created by a compiler from the future. Normally we don't try to
496+
// implement forwards compatibility for bitcode files, but when creating an
497+
// archive we can implement best-effort forwards compatibility by treating
498+
// the file as a blob and not creating symbol index entries for it. lld and
499+
// mold ignore the archive symbol index, so provided that you use one of
500+
// these linkers, LTO will work as long as lld or the gold plugin is newer
501+
// than the compiler. We only ignore errors if the archive format is one
502+
// that is supported by a linker that is known to ignore the index,
503+
// otherwise there's no chance of this working so we may as well error out.
504+
// We print a warning on read failure so that users of linkers that rely on
505+
// the symbol index can diagnose the issue.
506+
//
507+
// This is the same behavior as GNU ar when the linker plugin returns an
508+
// error when reading the input file. If the bitcode file is actually
509+
// malformed, it will be diagnosed at link time.
510+
if (!ObjOrErr) {
511+
switch (Kind) {
512+
case object::Archive::K_BSD:
513+
case object::Archive::K_GNU:
514+
case object::Archive::K_GNU64:
515+
Warn(ObjOrErr.takeError());
516+
return nullptr;
517+
case object::Archive::K_AIXBIG:
518+
case object::Archive::K_COFF:
519+
case object::Archive::K_DARWIN:
520+
case object::Archive::K_DARWIN64:
521+
return ObjOrErr.takeError();
522+
}
523+
}
495524
return std::move(*ObjOrErr);
496525
} else {
497526
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
@@ -751,7 +780,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
751780
object::Archive::Kind Kind, bool Thin, bool Deterministic,
752781
SymtabWritingMode NeedSymbols, SymMap *SymMap,
753782
LLVMContext &Context, ArrayRef<NewArchiveMember> NewMembers,
754-
std::optional<bool> IsEC) {
783+
std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
755784
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
756785
uint64_t MemHeadPadSize = 0;
757786
uint64_t Pos =
@@ -819,8 +848,10 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
819848

820849
if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
821850
for (const NewArchiveMember &M : NewMembers) {
822-
Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
823-
getSymbolicFile(M.Buf->getMemBufferRef(), Context);
851+
Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr = getSymbolicFile(
852+
M.Buf->getMemBufferRef(), Context, Kind, [&](Error Err) {
853+
Warn(createFileError(M.MemberName, std::move(Err)));
854+
});
824855
if (!SymFileOrErr)
825856
return createFileError(M.MemberName, SymFileOrErr.takeError());
826857
SymFiles.push_back(std::move(*SymFileOrErr));
@@ -1001,7 +1032,8 @@ Error writeArchiveToStream(raw_ostream &Out,
10011032
ArrayRef<NewArchiveMember> NewMembers,
10021033
SymtabWritingMode WriteSymtab,
10031034
object::Archive::Kind Kind, bool Deterministic,
1004-
bool Thin, std::optional<bool> IsEC) {
1035+
bool Thin, std::optional<bool> IsEC,
1036+
function_ref<void(Error)> Warn) {
10051037
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
10061038

10071039
SmallString<0> SymNamesBuf;
@@ -1023,7 +1055,7 @@ Error writeArchiveToStream(raw_ostream &Out,
10231055

10241056
Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
10251057
StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
1026-
isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC);
1058+
isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC, Warn);
10271059
if (Error E = DataOrErr.takeError())
10281060
return E;
10291061
std::vector<MemberData> &Data = *DataOrErr;
@@ -1266,19 +1298,23 @@ Error writeArchiveToStream(raw_ostream &Out,
12661298
return Error::success();
12671299
}
12681300

1301+
void warnToStderr(Error Err) {
1302+
llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "warning: ");
1303+
}
1304+
12691305
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
12701306
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
12711307
bool Deterministic, bool Thin,
12721308
std::unique_ptr<MemoryBuffer> OldArchiveBuf,
1273-
std::optional<bool> IsEC) {
1309+
std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
12741310
Expected<sys::fs::TempFile> Temp =
12751311
sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
12761312
if (!Temp)
12771313
return Temp.takeError();
12781314
raw_fd_ostream Out(Temp->FD, false);
12791315

12801316
if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
1281-
Deterministic, Thin, IsEC)) {
1317+
Deterministic, Thin, IsEC, Warn)) {
12821318
if (Error DiscardError = Temp->discard())
12831319
return joinErrors(std::move(E), std::move(DiscardError));
12841320
return E;
@@ -1302,12 +1338,14 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
13021338
Expected<std::unique_ptr<MemoryBuffer>>
13031339
writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
13041340
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
1305-
bool Deterministic, bool Thin) {
1341+
bool Deterministic, bool Thin,
1342+
function_ref<void(Error)> Warn) {
13061343
SmallVector<char, 0> ArchiveBufferVector;
13071344
raw_svector_ostream ArchiveStream(ArchiveBufferVector);
13081345

1309-
if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
1310-
Kind, Deterministic, Thin, std::nullopt))
1346+
if (Error E =
1347+
writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, Kind,
1348+
Deterministic, Thin, std::nullopt, Warn))
13111349
return std::move(E);
13121350

13131351
return std::make_unique<SmallVectorMemoryBuffer>(

llvm/test/Object/archive-malformed-object.test

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,75 @@
11
## Show that the archive library emits error messages when adding malformed
2-
## objects.
2+
## object files and skips symbol tables for "malformed" bitcode files, which
3+
## are assumed to be bitcode files generated by compilers from the future.
34

45
# RUN: rm -rf %t.dir
56
# RUN: split-file %s %t.dir
67
# RUN: cd %t.dir
78

8-
## Malformed bitcode object is the first file member of archive if the symbol table is required.
9+
## Create a malformed bitcode object.
910
# RUN: llvm-as input.ll -o input.bc
1011
# RUN: cp input.bc good.bc
1112
# RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)"
12-
# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
1313

14-
## Malformed bitcode object is the last file member of archive if the symbol table is required.
14+
## Malformed bitcode objects either warn or error depending on the archive format
15+
## (see switch in getSymbolicFile). If the archive was created with a warning,
16+
## we want to check that the archive map is empty. llvm-nm will fail when it
17+
## tries to read the malformed bitcode file, but it's supposed to print the
18+
## archive map first, which in this case it won't because there won't be one.
1519
# RUN: rm -rf bad.a
16-
# RUN: not llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
20+
# RUN: llvm-ar --format=bsd rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
21+
# RUN: not llvm-nm --print-armap bad.a | count 0
22+
# RUN: rm -rf bad.a
23+
# RUN: llvm-ar --format=gnu rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
24+
# RUN: not llvm-nm --print-armap bad.a | count 0
25+
# RUN: rm -rf bad.a
26+
# RUN: not llvm-ar --format=bigarchive rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
27+
# RUN: rm -rf bad.a
28+
# RUN: not llvm-ar --format=coff rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
29+
# RUN: rm -rf bad.a
30+
# RUN: not llvm-ar --format=darwin rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
31+
32+
## Malformed bitcode object is the last file member of archive and
33+
## the symbol table is required. In this case we check that the
34+
## symbol table contains entries for the good object only.
35+
# RUN: rm -rf bad.a
36+
# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
37+
# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP
1738

1839
## Malformed bitcode object if the symbol table is not required for big archive.
40+
## For big archives we print an error instead of a warning because the AIX linker
41+
## presumably requires the index.
1942
# RUN: rm -rf bad.a
2043
# RUN: not llvm-ar --format=bigarchive rcS bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
2144
# RUN: rm -rf bad.a
2245
# RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
2346

2447
# ERR1: error: bad.a: 'input.bc': Invalid bitcode signature
48+
# WARN1: warning: 'input.bc': Invalid bitcode signature
2549

2650
## Non-bitcode malformed file.
2751
# RUN: yaml2obj input.yaml -o input.o
2852
# RUN: not llvm-ar rc bad.a input.o 2>&1 | FileCheck %s --check-prefix=ERR2
2953

3054
# ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999
3155

32-
## Don't emit an error if the symbol table is not required for formats other than the big archive format.
33-
# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc
56+
## Don't emit an error or warning if the symbol table is not required for formats other than the big archive format.
57+
# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc 2>&1 | count 0
3458
# RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS
3559

3660
# CONTENTS: input.o
3761
# CONTENTS-NEXT: input.bc
3862

63+
# ARMAP: Archive map
64+
# ARMAP-NEXT: foo in good.bc
65+
# ARMAP-EMPTY:
66+
3967
#--- input.ll
4068
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
4169
target triple = "x86_64-pc-linux"
4270

71+
@foo = global i32 1
72+
4373
#--- input.yaml
4474
--- !ELF
4575
FileHeader:

0 commit comments

Comments
 (0)