-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][modules] Optimize construction and usage of the submodule index #113391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-modules Author: Jan Svoboda (jansvoboda11) ChangesThis patch avoids eagerly populating the submodule index on Moreover, this patch avoids performing qualified submodule lookup in This speeds up Full diff: https://github.com/llvm/llvm-project/pull/113391.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 9c5d33fbb562cc..484ceca9cb2036 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -227,7 +227,7 @@ class alignas(8) Module {
/// A mapping from the submodule name to the index into the
/// \c SubModules vector at which that submodule resides.
- llvm::StringMap<unsigned> SubModuleIndex;
+ mutable llvm::StringMap<unsigned> SubModuleIndex;
/// The AST file if this is a top-level module which has a
/// corresponding serialized AST file, or null otherwise.
@@ -595,7 +595,6 @@ class alignas(8) Module {
void setParent(Module *M) {
assert(!Parent);
Parent = M;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 75b567a347cb6c..b4a8e0e358ffbe 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -541,11 +541,11 @@ class ModuleMap {
///
/// \param IsExplicit Whether this is an explicit submodule.
///
- /// \returns The found or newly-created module, along with a boolean value
- /// that will be true if the module is newly-created.
- std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent,
- bool IsFramework,
- bool IsExplicit);
+ /// \returns The found or newly-created module.
+ Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
+ Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
/// Create a global module fragment for a C++ module unit.
///
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index ad52fccff5dc7f..785b819a08af8f 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name,
NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
}
@@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) {
}
Module *Module::findSubmodule(StringRef Name) const {
- llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name);
- if (Pos == SubModuleIndex.end())
- return nullptr;
+ // Add new submodules into the index.
+ for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I)
+ SubModuleIndex[SubModules[I]->Name] = I;
- return SubModules[Pos->getValue()];
+ if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
+ return SubModules[It->second];
+
+ return nullptr;
}
Module *Module::getGlobalModuleFragment() const {
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index fd6bc17ae9cdac..f8dbc04b6768f3 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
@@ -673,8 +673,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(File.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
Result->addTopHeader(File);
@@ -859,15 +859,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
return Context->findSubmodule(Name);
}
-std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
- Module *Parent,
- bool IsFramework,
- bool IsExplicit) {
+Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
// Try to find an existing module with this name.
if (Module *Sub = lookupModuleQualified(Name, Parent))
- return std::make_pair(Sub, false);
+ return Sub;
// Create a new module with this name.
+ return createModule(Name, Parent, IsFramework, IsExplicit);
+}
+
+Module *ModuleMap::createModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
+ assert(lookupModuleQualified(Name, Parent) == nullptr &&
+ "Creating duplicate submodule");
+
Module *Result = new (ModulesAlloc.Allocate())
Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
IsFramework, IsExplicit, NumCreatedModules++);
@@ -877,7 +883,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
Modules[Name] = Result;
ModuleScopeIDs[Result] = CurrentModuleScopeID;
}
- return std::make_pair(Result, true);
+ return Result;
}
Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
@@ -2126,8 +2132,7 @@ void ModuleMapParser::parseModuleDecl() {
Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
} else {
ActiveModule =
- Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
- .first;
+ Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit);
}
ActiveModule->DefinitionLoc = ModuleNameLoc;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7d9170e7f0b479..8a9528c43af6c7 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
return Err;
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+ bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
+ // If we don't know the top-level module, there's no point in doing qualified
+ // lookup of its submodules; it won't find anything anywhere within this tree.
+ // Let's skip that and avoid some string lookups.
+ auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule
+ : &ModuleMap::findOrCreateModule;
+
bool First = true;
Module *CurrentModule = nullptr;
RecordData Record;
@@ -5828,11 +5835,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
if (Parent)
ParentModule = getSubmodule(Parent);
- // Retrieve this (sub)module from the module map, creating it if
- // necessary.
- CurrentModule =
- ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
- .first;
+ CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
+ IsFramework, IsExplicit);
// FIXME: Call ModMap.setInferredModuleAllowedBy()
|
@llvm/pr-subscribers-clang Author: Jan Svoboda (jansvoboda11) ChangesThis patch avoids eagerly populating the submodule index on Moreover, this patch avoids performing qualified submodule lookup in This speeds up Full diff: https://github.com/llvm/llvm-project/pull/113391.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 9c5d33fbb562cc..484ceca9cb2036 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -227,7 +227,7 @@ class alignas(8) Module {
/// A mapping from the submodule name to the index into the
/// \c SubModules vector at which that submodule resides.
- llvm::StringMap<unsigned> SubModuleIndex;
+ mutable llvm::StringMap<unsigned> SubModuleIndex;
/// The AST file if this is a top-level module which has a
/// corresponding serialized AST file, or null otherwise.
@@ -595,7 +595,6 @@ class alignas(8) Module {
void setParent(Module *M) {
assert(!Parent);
Parent = M;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 75b567a347cb6c..b4a8e0e358ffbe 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -541,11 +541,11 @@ class ModuleMap {
///
/// \param IsExplicit Whether this is an explicit submodule.
///
- /// \returns The found or newly-created module, along with a boolean value
- /// that will be true if the module is newly-created.
- std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent,
- bool IsFramework,
- bool IsExplicit);
+ /// \returns The found or newly-created module.
+ Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
+ Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
/// Create a global module fragment for a C++ module unit.
///
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index ad52fccff5dc7f..785b819a08af8f 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name,
NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
}
@@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) {
}
Module *Module::findSubmodule(StringRef Name) const {
- llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name);
- if (Pos == SubModuleIndex.end())
- return nullptr;
+ // Add new submodules into the index.
+ for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I)
+ SubModuleIndex[SubModules[I]->Name] = I;
- return SubModules[Pos->getValue()];
+ if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
+ return SubModules[It->second];
+
+ return nullptr;
}
Module *Module::getGlobalModuleFragment() const {
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index fd6bc17ae9cdac..f8dbc04b6768f3 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
@@ -673,8 +673,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(File.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
Result->addTopHeader(File);
@@ -859,15 +859,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
return Context->findSubmodule(Name);
}
-std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
- Module *Parent,
- bool IsFramework,
- bool IsExplicit) {
+Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
// Try to find an existing module with this name.
if (Module *Sub = lookupModuleQualified(Name, Parent))
- return std::make_pair(Sub, false);
+ return Sub;
// Create a new module with this name.
+ return createModule(Name, Parent, IsFramework, IsExplicit);
+}
+
+Module *ModuleMap::createModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
+ assert(lookupModuleQualified(Name, Parent) == nullptr &&
+ "Creating duplicate submodule");
+
Module *Result = new (ModulesAlloc.Allocate())
Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
IsFramework, IsExplicit, NumCreatedModules++);
@@ -877,7 +883,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
Modules[Name] = Result;
ModuleScopeIDs[Result] = CurrentModuleScopeID;
}
- return std::make_pair(Result, true);
+ return Result;
}
Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
@@ -2126,8 +2132,7 @@ void ModuleMapParser::parseModuleDecl() {
Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
} else {
ActiveModule =
- Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
- .first;
+ Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit);
}
ActiveModule->DefinitionLoc = ModuleNameLoc;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7d9170e7f0b479..8a9528c43af6c7 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
return Err;
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+ bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
+ // If we don't know the top-level module, there's no point in doing qualified
+ // lookup of its submodules; it won't find anything anywhere within this tree.
+ // Let's skip that and avoid some string lookups.
+ auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule
+ : &ModuleMap::findOrCreateModule;
+
bool First = true;
Module *CurrentModule = nullptr;
RecordData Record;
@@ -5828,11 +5835,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
if (Parent)
ParentModule = getSubmodule(Parent);
- // Retrieve this (sub)module from the module map, creating it if
- // necessary.
- CurrentModule =
- ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
- .first;
+ CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
+ IsFramework, IsExplicit);
// FIXME: Call ModMap.setInferredModuleAllowedBy()
|
This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload.
cc68b8b
to
21295bc
Compare
Instead of changing the return type of `ModuleMap::findOrCreateModule`, this patch adds a counterpart that only returns `Module *` and thus has the same signature as `createModule()`, which is important in `ASTReader`.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/11233 Here is the relevant piece of the build log for the reference
|
…ex (llvm#113391) This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload.
Instead of changing the return type of `ModuleMap::findOrCreateModule`, this patch adds a counterpart that only returns `Module *` and thus has the same signature as `createModule()`, which is important in `ASTReader`.
…ex (llvm#113391) This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload. (cherry picked from commit 6c6351e)
Instead of changing the return type of `ModuleMap::findOrCreateModule`, this patch adds a counterpart that only returns `Module *` and thus has the same signature as `createModule()`, which is important in `ASTReader`. (cherry picked from commit 19131c7)
This patch avoids eagerly populating the submodule index on
Module
construction. TheStringMap
allocation shows up in my profiles ofclang-scan-deps
, while the index is not necessary most of the time. We still construct it on-demand.Moreover, this patch avoids performing qualified submodule lookup in
ASTReader
whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway.This speeds up
clang-scan-deps
by ~0.5% on my workload.