Skip to content

Commit f9d3e98

Browse files
authored
[lld-macho] Improve robustness of ObjC category merging (#112618)
This patch enhances the robustness of lld's Objective-C category merging. Currently, the category merger assumes it can fully parse and understand the format of all categories in the input, triggering an assert if any invalid category data is encountered. This will end up causing asserts in certain rare corner cases that are difficult to reproduce in small test cases. The proposed changes modify the behavior so that if invalid category data is detected, category merging is skipped for that specific class and all other categories sharing the same base class. This approach allows the linker to continue processing other categories without failing entirely due to a single problematic input. We also add a LIT test to where we corrupt category data and check that category merging for that class was skipped but the link was successful.
1 parent 5995e4b commit f9d3e98

File tree

2 files changed

+80
-32
lines changed

2 files changed

+80
-32
lines changed

lld/MachO/ObjC.cpp

Lines changed: 67 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class ObjcCategoryMerger {
423423
private:
424424
DenseSet<const Symbol *> collectNlCategories();
425425
void collectAndValidateCategoriesData();
426-
void
426+
bool
427427
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
428428

429429
void eraseISec(ConcatInputSection *isec);
@@ -434,8 +434,8 @@ class ObjcCategoryMerger {
434434
catListToErasedOffsets);
435435
void collectSectionWriteInfoFromIsec(const InputSection *isec,
436436
InfoWriteSection &catWriteInfo);
437-
void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
438-
void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
437+
bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
438+
bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
439439
ClassExtensionInfo &extInfo);
440440

441441
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
@@ -446,7 +446,7 @@ class ObjcCategoryMerger {
446446
uint32_t secOffset,
447447
SourceLanguage sourceLang);
448448

449-
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
449+
bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
450450
PointerListInfo &ptrList);
451451

452452
void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
@@ -474,7 +474,7 @@ class ObjcCategoryMerger {
474474
uint32_t offset);
475475
Defined *getClassRo(const Defined *classSym, bool getMetaRo);
476476
SourceLanguage getClassSymSourceLang(const Defined *classSym);
477-
void mergeCategoriesIntoBaseClass(const Defined *baseClass,
477+
bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
478478
std::vector<InfoInputCategory> &categories);
479479
void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
480480
void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
@@ -543,9 +543,9 @@ ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
543543
if (!reloc)
544544
return nullptr;
545545

546-
Symbol *sym = reloc->referent.get<Symbol *>();
546+
Symbol *sym = reloc->referent.dyn_cast<Symbol *>();
547547

548-
if (reloc->addend) {
548+
if (reloc->addend && sym) {
549549
assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
550550
Defined *definedSym = cast<Defined>(sym);
551551
sym = tryFindDefinedOnIsec(definedSym->isec(),
@@ -618,7 +618,7 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
618618
}
619619
}
620620

621-
void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
621+
bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
622622
const InfoInputCategory &catInfo) {
623623

624624
if (!infoCategoryWriter.catListInfo.valid)
@@ -631,7 +631,14 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
631631
if (!infoCategoryWriter.catNameInfo.valid) {
632632
lld::macho::Defined *catNameSym =
633633
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
634-
assert(catNameSym && "Category does not have a valid name Symbol");
634+
635+
if (!catNameSym) {
636+
// This is an unhandeled case where the category name is not a symbol but
637+
// instead points to an CStringInputSection (that doesn't have any symbol)
638+
// TODO: Find a small repro and either fix or add a test case for this
639+
// scenario
640+
return false;
641+
}
635642

636643
collectSectionWriteInfoFromIsec(catNameSym->isec(),
637644
infoCategoryWriter.catNameInfo);
@@ -651,6 +658,8 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
651658
}
652659
}
653660
}
661+
662+
return true;
654663
}
655664

656665
// Parse a protocol list that might be linked to ConcatInputSection at a given
@@ -723,7 +732,7 @@ ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
723732
// Parse a pointer list that might be linked to ConcatInputSection at a given
724733
// offset. This can be used for instance methods, class methods, instance props
725734
// and class props since they have the same format.
726-
void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
735+
bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
727736
uint32_t secOffset,
728737
PointerListInfo &ptrList) {
729738
assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
@@ -732,8 +741,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
732741
"Trying to read pointer list beyond section end");
733742

734743
const Reloc *reloc = isec->getRelocAt(secOffset);
744+
// Empty list is a valid case, return true.
735745
if (!reloc)
736-
return;
746+
return true;
737747

738748
auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
739749
assert(ptrListSym && "Reloc does not have a valid Defined");
@@ -759,17 +769,24 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
759769
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
760770
assert(reloc && "No reloc found at pointer list offset");
761771

762-
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
763-
assert(listSym && "Reloc does not have a valid Defined");
772+
auto *listSym =
773+
dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>());
774+
// Sometimes, the reloc points to a StringPiece (InputSection + addend)
775+
// instead of a symbol.
776+
// TODO: Skip these cases for now, but we should fix this.
777+
if (!listSym)
778+
return false;
764779

765780
ptrList.allPtrs.push_back(listSym);
766781
}
782+
783+
return true;
767784
}
768785

769786
// Here we parse all the information of an input category (catInfo) and
770787
// append the parsed info into the structure which will contain all the
771788
// information about how a class is extended (extInfo)
772-
void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
789+
bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
773790
ClassExtensionInfo &extInfo) {
774791
const Reloc *catNameReloc =
775792
catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
@@ -808,20 +825,27 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
808825
"class");
809826
}
810827

811-
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
812-
extInfo.instanceMethods);
828+
if (!parsePointerListInfo(catInfo.catBodyIsec,
829+
catLayout.instanceMethodsOffset,
830+
extInfo.instanceMethods))
831+
return false;
813832

814-
parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
815-
extInfo.classMethods);
833+
if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
834+
extInfo.classMethods))
835+
return false;
816836

817837
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
818838
extInfo.protocols, catInfo.sourceLanguage);
819839

820-
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
821-
extInfo.instanceProps);
840+
if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
841+
extInfo.instanceProps))
842+
return false;
822843

823-
parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
824-
extInfo.classProps);
844+
if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
845+
extInfo.classProps))
846+
return false;
847+
848+
return true;
825849
}
826850

827851
// Generate a protocol list (including header) and link it into the parent at
@@ -1090,14 +1114,15 @@ Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
10901114

10911115
// This method merges all the categories (sharing a base class) into a single
10921116
// category.
1093-
void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1117+
bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
10941118
std::vector<InfoInputCategory> &categories) {
10951119
assert(categories.size() > 1 && "Expected at least 2 categories");
10961120

10971121
ClassExtensionInfo extInfo(catLayout);
10981122

10991123
for (auto &catInfo : categories)
1100-
parseCatInfoToExtInfo(catInfo, extInfo);
1124+
if (!parseCatInfoToExtInfo(catInfo, extInfo))
1125+
return false;
11011126

11021127
Defined *newCatDef = emitCategory(extInfo);
11031128
assert(newCatDef && "Failed to create a new category");
@@ -1107,6 +1132,8 @@ void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
11071132

11081133
for (auto &catInfo : categories)
11091134
catInfo.wasMerged = true;
1135+
1136+
return true;
11101137
}
11111138

11121139
void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
@@ -1179,9 +1206,10 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
11791206
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
11801207
assert(classSym && "Category does not have a valid base class");
11811208

1182-
categoryMap[classSym].push_back(catInputInfo);
1209+
if (!collectCategoryWriterInfoFromCategory(catInputInfo))
1210+
continue;
11831211

1184-
collectCategoryWriterInfoFromCategory(catInputInfo);
1212+
categoryMap[classSym].push_back(catInputInfo);
11851213
}
11861214
}
11871215
}
@@ -1309,13 +1337,17 @@ void ObjcCategoryMerger::doMerge() {
13091337
collectAndValidateCategoriesData();
13101338

13111339
for (auto &[baseClass, catInfos] : categoryMap) {
1340+
bool merged = false;
13121341
if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
13131342
// Merge all categories into the base class
1314-
mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1343+
merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
13151344
} else if (catInfos.size() > 1) {
13161345
// Merge all categories into a new, single category
1317-
mergeCategoriesIntoSingleCategory(catInfos);
1346+
merged = mergeCategoriesIntoSingleCategory(catInfos);
13181347
}
1348+
if (!merged)
1349+
warn("ObjC category merging skipped for class symbol' " +
1350+
baseClass->getName().str() + "'\n");
13191351
}
13201352

13211353
// Erase all categories that were merged
@@ -1374,7 +1406,8 @@ ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
13741406

13751407
llvm_unreachable("Unexpected class symbol name during category merging");
13761408
}
1377-
void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1409+
1410+
bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
13781411
const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
13791412
assert(categories.size() >= 1 && "Expected at least one category to merge");
13801413

@@ -1383,9 +1416,9 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
13831416
extInfo.baseClass = baseClass;
13841417
extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
13851418

1386-
for (auto &catInfo : categories) {
1387-
parseCatInfoToExtInfo(catInfo, extInfo);
1388-
}
1419+
for (auto &catInfo : categories)
1420+
if (!parseCatInfoToExtInfo(catInfo, extInfo))
1421+
return false;
13891422

13901423
// Get metadata for the base class
13911424
Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
@@ -1452,6 +1485,8 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
14521485
// Mark all the categories as merged - this will be used to erase them later
14531486
for (auto &catInfo : categories)
14541487
catInfo.wasMerged = true;
1488+
1489+
return true;
14551490
}
14561491

14571492
// Erase the symbol at a given offset in an InputSection

lld/test/MachO/objc-category-merging-minimal.s

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,19 @@
2828
# RUN: %lld -no_objc_relative_method_lists -arch arm64 -dylib -o merge_base_class_swift_minimal_yes_merge.dylib -objc_category_merging MyBaseClassSwiftExtension.o merge_base_class_minimal.o
2929
# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_swift_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE_SWIFT
3030

31+
############ Test merging skipped due to invalid category name ############
32+
# Modify __OBJC_$_CATEGORY_MyBaseClass_$_Category01's name to point to L_OBJC_IMAGE_INFO+3
33+
# RUN: sed -E '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { n; s/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/\t.quad\tL_OBJC_IMAGE_INFO+3/ }' merge_cat_minimal.s > merge_cat_minimal_bad_name.s
34+
35+
# Assemble the modified source
36+
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal_bad_name.o merge_cat_minimal_bad_name.s
37+
38+
# Run lld and check for the specific warning
39+
# RUN: %no-fatal-warnings-lld -arch arm64 -dylib -objc_category_merging -o merge_cat_minimal_merge.dylib a64_fakedylib.dylib merge_cat_minimal_bad_name.o 2>&1 | FileCheck %s --check-prefix=MERGE_WARNING
40+
41+
# Check that lld emitted the warning about skipping category merging
42+
MERGE_WARNING: warning: ObjC category merging skipped for class symbol' _OBJC_CLASS_$_MyBaseClass'
43+
3144
#### Check merge categories enabled ###
3245
# Check that the original categories are not there
3346
MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01

0 commit comments

Comments
 (0)