-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[lld-macho][NFC] Track category merger input data source language for better verification #95473
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
3d0a559
to
4b020cc
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
4b020cc
to
063d302
Compare
063d302
to
64fbb87
Compare
@llvm/pr-subscribers-lld Author: None (alx32) ChangesThis change adds tracking for the source language of the various input structs used by the category merger. Identification is based on expected symbol names. It also adds checks to ensure we're dealing with the expected data in known scenarios. Full diff: https://github.com/llvm/llvm-project/pull/95473.diff 2 Files Affected:
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..a277c18c7b29f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
namespace {
class ObjcCategoryMerger {
+ // In which language was a particular construct originally defined
+ enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
// Information about an input category
struct InfoInputCategory {
ConcatInputSection *catListIsec;
ConcatInputSection *catBodyIsec;
uint32_t offCatListIsec = 0;
+ SourceLanguage sourceLanguage = SourceUnknown;
bool wasMerged = false;
};
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
// Merged names of containers. Ex: base|firstCategory|secondCategory|...
std::string mergedContainerName;
std::string baseClassName;
- Symbol *baseClass = nullptr;
+ const Symbol *baseClass = nullptr;
+ SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
CategoryLayout &catLayout;
// In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
ClassExtensionInfo &extInfo);
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
- PointerListInfo &ptrList);
+ PointerListInfo &ptrList,
+ SourceLanguage sourceLang);
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset);
+ uint32_t secOffset,
+ SourceLanguage sourceLang);
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
// Parse a protocol list that might be linked to ConcatInputSection at a given
// offset. The format of the protocol list is different than other lists (prop
// lists, method lists) so we need to parse it differently
-void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset,
- PointerListInfo &ptrList) {
+void ObjcCategoryMerger::parseProtocolListInfo(
+ const ConcatInputSection *isec, uint32_t secOffset,
+ PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
"Tried to read pointer list beyond protocol section end");
@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
[[maybe_unused]] uint32_t expectedListSizeSwift =
expectedListSize - target->wordSize;
- assert((expectedListSize == ptrListSym->isec()->data.size() ||
- expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+ assert(((expectedListSize == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceObjC) ||
+ (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceSwift)) &&
"Protocol list does not match expected size");
uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// Parse a protocol list and return the PointerListInfo for it
ObjcCategoryMerger::PointerListInfo
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset) {
+ uint32_t secOffset,
+ SourceLanguage sourceLang) {
PointerListInfo ptrList;
- parseProtocolListInfo(isec, secOffset, ptrList);
+ parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
return ptrList;
}
@@ -772,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
// is this the first category we are parsing?
- if (extInfo.mergedContainerName.empty())
+ if (extInfo.mergedContainerName.empty()) {
extInfo.objFileForMergeData =
dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
- else
+ } else
extInfo.mergedContainerName += "|";
assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
extInfo.classMethods);
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, catInfo.sourceLanguage);
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (nlCategories.count(categorySym))
continue;
- assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
- categorySym->getName().starts_with(
- objc::symbol_names::swift_objc_category));
-
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
+ InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+ if (categorySym->getName().starts_with(objc::symbol_names::category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+ else if (categorySym->getName().starts_with(
+ objc::symbol_names::swift_objc_category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected category symbol name");
+
// Check that the category has a reloc at 'klassOffset' (which is
// a pointer to the class symbol)
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
assert(classSym && "Category does not have a valid base class");
- InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
categoryMap[classSym].push_back(catInputInfo);
collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Collect all the info from the categories
ClassExtensionInfo extInfo(catLayout);
+ extInfo.baseClass = baseClass;
+
+ if (baseClass->getName().starts_with(objc::symbol_names::klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+ else if (baseClass->getName().starts_with(
+ objc::symbol_names::swift_objc_klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected base class symbol name");
+
for (auto &catInfo : categories) {
parseCatInfoToExtInfo(catInfo, extInfo);
}
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Protocol lists are a special case - the same protocol list is in classRo
// and metaRo, so we only need to parse it once
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, extInfo.baseClassSourceLanguage);
// Check that the classRo and metaRo protocol lists are identical
- assert(
- parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
- parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
- "Category merger expects classRo and metaRo to have the same protocol "
- "list");
+ assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) ==
+ parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) &&
+ "Category merger expects classRo and metaRo to have the same protocol "
+ "list");
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
} // namespace symbol_names
// Check for duplicate method names within related categories / classes.
|
@llvm/pr-subscribers-lld-macho Author: None (alx32) ChangesThis change adds tracking for the source language of the various input structs used by the category merger. Identification is based on expected symbol names. It also adds checks to ensure we're dealing with the expected data in known scenarios. Full diff: https://github.com/llvm/llvm-project/pull/95473.diff 2 Files Affected:
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 0105aa547e173..a277c18c7b29f 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -349,11 +349,15 @@ void objc::checkCategories() {
namespace {
class ObjcCategoryMerger {
+ // In which language was a particular construct originally defined
+ enum SourceLanguage { SourceObjC, SourceSwift, SourceUnknown };
+
// Information about an input category
struct InfoInputCategory {
ConcatInputSection *catListIsec;
ConcatInputSection *catBodyIsec;
uint32_t offCatListIsec = 0;
+ SourceLanguage sourceLanguage = SourceUnknown;
bool wasMerged = false;
};
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
// Merged names of containers. Ex: base|firstCategory|secondCategory|...
std::string mergedContainerName;
std::string baseClassName;
- Symbol *baseClass = nullptr;
+ const Symbol *baseClass = nullptr;
+ SourceLanguage baseClassSourceLanguage = SourceUnknown;
+
CategoryLayout &catLayout;
// In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
ClassExtensionInfo &extInfo);
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
- PointerListInfo &ptrList);
+ PointerListInfo &ptrList,
+ SourceLanguage sourceLang);
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset);
+ uint32_t secOffset,
+ SourceLanguage sourceLang);
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
// Parse a protocol list that might be linked to ConcatInputSection at a given
// offset. The format of the protocol list is different than other lists (prop
// lists, method lists) so we need to parse it differently
-void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset,
- PointerListInfo &ptrList) {
+void ObjcCategoryMerger::parseProtocolListInfo(
+ const ConcatInputSection *isec, uint32_t secOffset,
+ PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
"Tried to read pointer list beyond protocol section end");
@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
[[maybe_unused]] uint32_t expectedListSizeSwift =
expectedListSize - target->wordSize;
- assert((expectedListSize == ptrListSym->isec()->data.size() ||
- expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
+ assert(((expectedListSize == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceObjC) ||
+ (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
+ sourceLang == SourceSwift)) &&
"Protocol list does not match expected size");
uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// Parse a protocol list and return the PointerListInfo for it
ObjcCategoryMerger::PointerListInfo
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
- uint32_t secOffset) {
+ uint32_t secOffset,
+ SourceLanguage sourceLang) {
PointerListInfo ptrList;
- parseProtocolListInfo(isec, secOffset, ptrList);
+ parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
return ptrList;
}
@@ -772,10 +783,10 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
// is this the first category we are parsing?
- if (extInfo.mergedContainerName.empty())
+ if (extInfo.mergedContainerName.empty()) {
extInfo.objFileForMergeData =
dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
- else
+ } else
extInfo.mergedContainerName += "|";
assert(extInfo.objFileForMergeData &&
@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
extInfo.classMethods);
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, catInfo.sourceLanguage);
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
extInfo.instanceProps);
@@ -1151,14 +1162,19 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (nlCategories.count(categorySym))
continue;
- assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
- categorySym->getName().starts_with(
- objc::symbol_names::swift_objc_category));
-
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
+ InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+ if (categorySym->getName().starts_with(objc::symbol_names::category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceObjC;
+ else if (categorySym->getName().starts_with(
+ objc::symbol_names::swift_objc_category))
+ catInputInfo.sourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected category symbol name");
+
// Check that the category has a reloc at 'klassOffset' (which is
// a pointer to the class symbol)
@@ -1166,7 +1182,6 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
assert(classSym && "Category does not have a valid base class");
- InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
categoryMap[classSym].push_back(catInputInfo);
collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1381,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Collect all the info from the categories
ClassExtensionInfo extInfo(catLayout);
+ extInfo.baseClass = baseClass;
+
+ if (baseClass->getName().starts_with(objc::symbol_names::klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceObjC;
+ else if (baseClass->getName().starts_with(
+ objc::symbol_names::swift_objc_klass))
+ extInfo.baseClassSourceLanguage = SourceLanguage::SourceSwift;
+ else
+ llvm_unreachable("Unexpected base class symbol name");
+
for (auto &catInfo : categories) {
parseCatInfoToExtInfo(catInfo, extInfo);
}
@@ -1382,14 +1407,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
// Protocol lists are a special case - the same protocol list is in classRo
// and metaRo, so we only need to parse it once
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
- extInfo.protocols);
+ extInfo.protocols, extInfo.baseClassSourceLanguage);
// Check that the classRo and metaRo protocol lists are identical
- assert(
- parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
- parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
- "Category merger expects classRo and metaRo to have the same protocol "
- "list");
+ assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) ==
+ parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
+ extInfo.baseClassSourceLanguage) &&
+ "Category merger expects classRo and metaRo to have the same protocol "
+ "list");
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
extInfo.classMethods);
diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h
index 790e096caf760..db259a82fbbdb 100644
--- a/lld/MachO/ObjC.h
+++ b/lld/MachO/ObjC.h
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
constexpr const char swift_objc_category[] = "__CATEGORY_";
+constexpr const char swift_objc_klass[] = "_$s";
} // namespace symbol_names
// Check for duplicate method names within related categories / classes.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
This change adds tracking for the source language of the various input structs used by the category merger. Identification is based on expected symbol names. It also adds checks to ensure we're dealing with the expected data in known scenarios.