Skip to content

Commit d09973b

Browse files
committed
[clang][deps][modules] Allocate input file paths lazily (llvm#114457)
This PR builds on top of llvm#113984 and attempts to avoid allocating input file paths eagerly. Instead, the `InputFileInfo` type used by `ASTReader` now only holds `StringRef`s that point into the PCM file buffer, and the full input file paths get resolved on demand. The dependency scanner makes use of this in a bit of a roundabout way: `ModuleDeps` now only holds (an owning copy of) the short unresolved input file paths, which get resolved lazily. This can be a big win, I'm seeing up to a 5% speedup. (cherry picked from commit 9d4837f)
1 parent e3f0c0d commit d09973b

29 files changed

+161
-151
lines changed

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,20 @@ enum ModuleKind {
6262

6363
/// The input file info that has been loaded from an AST file.
6464
struct InputFileInfo {
65-
std::string FilenameAsRequested;
66-
std::string Filename;
65+
StringRef UnresolvedImportedFilenameAsRequested;
66+
StringRef UnresolvedImportedFilename;
67+
6768
uint64_t ContentHash;
6869
off_t StoredSize;
6970
time_t StoredTime;
7071
bool Overridden;
7172
bool Transient;
7273
bool TopLevel;
7374
bool ModuleMap;
75+
76+
bool isValid() const {
77+
return !UnresolvedImportedFilenameAsRequested.empty();
78+
}
7479
};
7580

7681
/// The input file that has been loaded from this AST file, along with

clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,6 @@ struct ModuleDeps {
123123
/// additionally appear in \c FileDeps as a dependency.
124124
std::string ClangModuleMapFile;
125125

126-
/// A collection of absolute paths to files that this module directly depends
127-
/// on, not including transitive dependencies.
128-
llvm::StringSet<> FileDeps;
129-
130126
/// A collection of absolute paths to module map files that this module needs
131127
/// to know about. The ordering is significant.
132128
std::vector<std::string> ModuleMapFileDeps;
@@ -155,13 +151,25 @@ struct ModuleDeps {
155151
/// an entity from this module is used.
156152
llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
157153

154+
/// Invokes \c Cb for all file dependencies of this module. Each provided
155+
/// \c StringRef is only valid within the individual callback invocation.
156+
void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
157+
158158
/// Get (or compute) the compiler invocation that can be used to build this
159159
/// module. Does not include argv[0].
160160
const std::vector<std::string> &getBuildArguments();
161161

162162
private:
163+
friend class ModuleDepCollector;
163164
friend class ModuleDepCollectorPP;
164165

166+
/// The base directory for relative paths in \c FileDeps.
167+
std::string FileDepsBaseDir;
168+
169+
/// A collection of paths to files that this module directly depends on, not
170+
/// including transitive dependencies.
171+
std::vector<std::string> FileDeps;
172+
165173
std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
166174
BuildInfo;
167175
};

clang/lib/Serialization/ASTReader.cpp

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2454,7 +2454,7 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
24542454
return InputFileInfo();
24552455

24562456
// If we've already loaded this input file, return it.
2457-
if (!F.InputFileInfosLoaded[ID - 1].Filename.empty())
2457+
if (F.InputFileInfosLoaded[ID - 1].isValid())
24582458
return F.InputFileInfosLoaded[ID - 1];
24592459

24602460
// Go find this input file.
@@ -2491,21 +2491,11 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
24912491
R.Transient = static_cast<bool>(Record[4]);
24922492
R.TopLevel = static_cast<bool>(Record[5]);
24932493
R.ModuleMap = static_cast<bool>(Record[6]);
2494-
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
2495-
uint16_t AsRequestedLength = Record[7];
2496-
2497-
StringRef NameAsRequestedRef = Blob.substr(0, AsRequestedLength);
2498-
StringRef NameRef = Blob.substr(AsRequestedLength);
2499-
2500-
std::string NameAsRequested =
2501-
ResolveImportedPathAndAllocate(PathBuf, NameAsRequestedRef, F);
2502-
std::string Name = ResolveImportedPathAndAllocate(PathBuf, NameRef, F);
2503-
2504-
if (Name.empty())
2505-
Name = NameAsRequested;
2506-
2507-
return std::make_pair(std::move(NameAsRequested), std::move(Name));
2508-
}();
2494+
uint16_t AsRequestedLength = Record[7];
2495+
R.UnresolvedImportedFilenameAsRequested = Blob.substr(0, AsRequestedLength);
2496+
R.UnresolvedImportedFilename = Blob.substr(AsRequestedLength);
2497+
if (R.UnresolvedImportedFilename.empty())
2498+
R.UnresolvedImportedFilename = R.UnresolvedImportedFilenameAsRequested;
25092499

25102500
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
25112501
if (!MaybeEntry) // FIXME this drops errors on the floor.
@@ -2557,7 +2547,8 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
25572547
time_t StoredTime = FI.StoredTime;
25582548
bool Overridden = FI.Overridden;
25592549
bool Transient = FI.Transient;
2560-
StringRef Filename = FI.FilenameAsRequested;
2550+
auto Filename =
2551+
ResolveImportedPath(PathBuf, FI.UnresolvedImportedFilenameAsRequested, F);
25612552
uint64_t StoredContentHash = FI.ContentHash;
25622553

25632554
// For standard C++ modules, we don't need to check the inputs.
@@ -2573,17 +2564,17 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
25732564
Overridden = false;
25742565
}
25752566

2576-
auto File = FileMgr.getOptionalFileRef(Filename, /*OpenFile=*/false);
2567+
auto File = FileMgr.getOptionalFileRef(*Filename, /*OpenFile=*/false);
25772568

25782569
// For an overridden file, create a virtual file with the stored
25792570
// size/timestamp.
25802571
if ((Overridden || Transient || SkipChecks) && !File)
2581-
File = FileMgr.getVirtualFileRef(Filename, StoredSize, StoredTime);
2572+
File = FileMgr.getVirtualFileRef(*Filename, StoredSize, StoredTime);
25822573

25832574
if (!File) {
25842575
if (Complain) {
25852576
std::string ErrorStr = "could not find file '";
2586-
ErrorStr += Filename;
2577+
ErrorStr += *Filename;
25872578
ErrorStr += "' referenced by AST file '";
25882579
ErrorStr += F.FileName;
25892580
ErrorStr += "'";
@@ -2603,7 +2594,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
26032594
if ((!Overridden && !Transient) && !SkipChecks &&
26042595
SM.isFileOverridden(*File)) {
26052596
if (Complain)
2606-
Error(diag::err_fe_pch_file_overridden, Filename);
2597+
Error(diag::err_fe_pch_file_overridden, *Filename);
26072598

26082599
// After emitting the diagnostic, bypass the overriding file to recover
26092600
// (this creates a separate FileEntry).
@@ -2699,7 +2690,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
26992690
// The top-level PCH is stale.
27002691
StringRef TopLevelPCHName(ImportStack.back()->FileName);
27012692
Diag(diag::err_fe_ast_file_modified)
2702-
<< Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
2693+
<< *Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
27032694
<< TopLevelPCHName << FileChange.Kind
27042695
<< (FileChange.Old && FileChange.New)
27052696
<< llvm::itostr(FileChange.Old.value_or(0))
@@ -2708,7 +2699,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27082699
// Print the import stack.
27092700
if (ImportStack.size() > 1) {
27102701
Diag(diag::note_pch_required_by)
2711-
<< Filename << ImportStack[0]->FileName;
2702+
<< *Filename << ImportStack[0]->FileName;
27122703
for (unsigned I = 1; I < ImportStack.size(); ++I)
27132704
Diag(diag::note_pch_required_by)
27142705
<< ImportStack[I-1]->FileName << ImportStack[I]->FileName;
@@ -2948,8 +2939,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
29482939
for (unsigned I = 0; I < N; ++I) {
29492940
bool IsSystem = I >= NumUserInputs;
29502941
InputFileInfo FI = getInputFileInfo(F, I + 1);
2942+
auto FilenameAsRequested = ResolveImportedPath(
2943+
PathBuf, FI.UnresolvedImportedFilenameAsRequested, F);
29512944
Listener->visitInputFile(
2952-
FI.FilenameAsRequested, IsSystem, FI.Overridden,
2945+
*FilenameAsRequested, IsSystem, FI.Overridden,
29532946
F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule);
29542947
}
29552948
}

clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ using namespace clang;
2626
using namespace tooling;
2727
using namespace dependencies;
2828

29+
void ModuleDeps::forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const {
30+
SmallString<0> PathBuf;
31+
PathBuf.reserve(256);
32+
for (StringRef FileDep : FileDeps) {
33+
auto ResolvedFileDep =
34+
ASTReader::ResolveImportedPath(PathBuf, FileDep, FileDepsBaseDir);
35+
Cb(*ResolvedFileDep);
36+
}
37+
}
38+
2939
const std::vector<std::string> &ModuleDeps::getBuildArguments() {
3040
assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
3141
"Using uninitialized ModuleDeps");
@@ -665,6 +675,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
665675
serialization::ModuleFile *MF =
666676
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
667677
*M->getASTFile());
678+
MD.FileDepsBaseDir = MF->BaseDirectory;
668679
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
669680
*MF, /*IncludeSystem=*/true,
670681
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
@@ -673,25 +684,30 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
673684
// actual on-disk module map file that allowed inferring the module,
674685
// which is what we need for building the module explicitly
675686
// Let's ignore this file.
676-
if (StringRef(IFI.Filename).ends_with("__inferred_module.map"))
687+
if (IFI.UnresolvedImportedFilename.ends_with("__inferred_module.map"))
677688
return;
678-
MDC.addFileDep(MD, IFI.Filename);
689+
MDC.addFileDep(MD, IFI.UnresolvedImportedFilename);
679690
});
680691

681692
llvm::DenseSet<const Module *> SeenDeps;
682693
addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
683694
addAllSubmoduleDeps(M, MD, SeenDeps);
684695
addAllAffectingClangModules(M, MD, SeenDeps);
685696

697+
SmallString<0> PathBuf;
698+
PathBuf.reserve(256);
686699
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
687700
*MF, /*IncludeSystem=*/true,
688701
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
689702
if (!(IFI.TopLevel && IFI.ModuleMap))
690703
return;
691-
if (StringRef(IFI.FilenameAsRequested)
692-
.ends_with("__inferred_module.map"))
704+
if (IFI.UnresolvedImportedFilenameAsRequested.ends_with(
705+
"__inferred_module.map"))
693706
return;
694-
MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
707+
auto ResolvedFilenameAsRequested = ASTReader::ResolveImportedPath(
708+
PathBuf, IFI.UnresolvedImportedFilenameAsRequested,
709+
MF->BaseDirectory);
710+
MD.ModuleMapFileDeps.emplace_back(*ResolvedFilenameAsRequested);
695711
});
696712

697713
if (!MF->IncludeTreeID.empty())
@@ -883,23 +899,16 @@ static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
883899
void ModuleDepCollector::addFileDep(StringRef Path) {
884900
if (IsStdModuleP1689Format) {
885901
// Within P1689 format, we don't want all the paths to be absolute path
886-
// since it may violate the tranditional make style dependencies info.
887-
FileDeps.push_back(std::string(Path));
902+
// since it may violate the traditional make style dependencies info.
903+
FileDeps.emplace_back(Path);
888904
return;
889905
}
890906

891907
llvm::SmallString<256> Storage;
892908
Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
893-
FileDeps.push_back(std::string(Path));
909+
FileDeps.emplace_back(Path);
894910
}
895911

896912
void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
897-
if (IsStdModuleP1689Format) {
898-
MD.FileDeps.insert(Path);
899-
return;
900-
}
901-
902-
llvm::SmallString<256> Storage;
903-
Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
904-
MD.FileDeps.insert(Path);
913+
MD.FileDeps.emplace_back(Path);
905914
}

clang/test/ClangScanDeps/diagnostics.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ module mod { header "mod.h" }
3434
// CHECK: ],
3535
// CHECK-NEXT: "context-hash": "[[HASH_MOD:.*]]",
3636
// CHECK-NEXT: "file-deps": [
37+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
3738
// CHECK-NEXT: "[[PREFIX]]/mod.h"
38-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
3939
// CHECK-NEXT: ],
4040
// CHECK-NEXT: "link-libraries": [],
4141
// CHECK-NEXT: "name": "mod"

clang/test/ClangScanDeps/header-search-pruning-transitive.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ module X { header "X.h" }
7272
// CHECK: ],
7373
// CHECK-NEXT: "context-hash": "[[HASH_X:.*]]",
7474
// CHECK-NEXT: "file-deps": [
75-
// CHECK-NEXT: "[[PREFIX]]/X.h",
76-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
75+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
76+
// CHECK-NEXT: "[[PREFIX]]/X.h"
7777
// CHECK-NEXT: ],
7878
// CHECK-NEXT: "link-libraries": [],
7979
// CHECK-NEXT: "name": "X"
@@ -85,11 +85,11 @@ module X { header "X.h" }
8585
// CHECK: ],
8686
// CHECK-NEXT: "context-hash": "[[HASH_Y_WITH_A]]",
8787
// CHECK-NEXT: "file-deps": [
88+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
8889
// CHECK-NEXT: "[[PREFIX]]/Y.h",
89-
// CHECK-NEXT: "[[PREFIX]]/a/a.h",
9090
// CHECK-NEXT: "[[PREFIX]]/begin/begin.h",
91-
// CHECK-NEXT: "[[PREFIX]]/end/end.h",
92-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
91+
// CHECK-NEXT: "[[PREFIX]]/a/a.h",
92+
// CHECK-NEXT: "[[PREFIX]]/end/end.h"
9393
// CHECK-NEXT: ],
9494
// CHECK-NEXT: "link-libraries": [],
9595
// CHECK-NEXT: "name": "Y"
@@ -128,8 +128,8 @@ module X { header "X.h" }
128128
// also has a different context hash from the first version of module X.
129129
// CHECK-NOT: "context-hash": "[[HASH_X]]",
130130
// CHECK: "file-deps": [
131-
// CHECK-NEXT: "[[PREFIX]]/X.h",
132-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
131+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
132+
// CHECK-NEXT: "[[PREFIX]]/X.h"
133133
// CHECK-NEXT: ],
134134
// CHECK-NEXT: "link-libraries": [],
135135
// CHECK-NEXT: "name": "X"
@@ -141,10 +141,10 @@ module X { header "X.h" }
141141
// CHECK: ],
142142
// CHECK-NEXT: "context-hash": "[[HASH_Y_WITHOUT_A]]",
143143
// CHECK-NEXT: "file-deps": [
144+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
144145
// CHECK-NEXT: "[[PREFIX]]/Y.h",
145146
// CHECK-NEXT: "[[PREFIX]]/begin/begin.h",
146-
// CHECK-NEXT: "[[PREFIX]]/end/end.h",
147-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
147+
// CHECK-NEXT: "[[PREFIX]]/end/end.h"
148148
// CHECK-NEXT: ],
149149
// CHECK-NEXT: "link-libraries": [],
150150
// CHECK-NEXT: "name": "Y"

clang/test/ClangScanDeps/link-libraries.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ module transitive {
4444
// CHECK: ],
4545
// CHECK-NEXT: "context-hash": "{{.*}}",
4646
// CHECK-NEXT: "file-deps": [
47+
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap",
4748
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/Framework.framework/Headers/Framework.h"
48-
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap"
4949
// CHECK-NEXT: ],
5050
// CHECK-NEXT: "link-libraries": [
5151
// CHECK-NEXT: {
@@ -67,8 +67,8 @@ module transitive {
6767
// CHECK: ],
6868
// CHECK-NEXT: "context-hash": "{{.*}}",
6969
// CHECK-NEXT: "file-deps": [
70+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
7071
// CHECK-NEXT: "[[PREFIX]]/direct.h"
71-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
7272
// CHECK-NEXT: ],
7373
// CHECK-NEXT: "link-libraries": [],
7474
// CHECK-NEXT: "name": "direct"
@@ -89,10 +89,10 @@ module transitive {
8989
// CHECK: ],
9090
// CHECK-NEXT: "context-hash": "{{.*}}",
9191
// CHECK-NEXT: "file-deps": [
92+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
93+
// CHECK-NEXT: "[[PREFIX]]/root.h",
94+
// CHECK-NEXT: "[[PREFIX]]/root/textual.h",
9295
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap"
93-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
94-
// CHECK-NEXT: "[[PREFIX]]/root.h"
95-
// CHECK-NEXT: "[[PREFIX]]/root/textual.h"
9696
// CHECK-NEXT: ],
9797
// CHECK-NEXT: "link-libraries": [],
9898
// CHECK-NEXT: "name": "root"
@@ -104,7 +104,7 @@ module transitive {
104104
// CHECK: ],
105105
// CHECK-NEXT: "context-hash": "{{.*}}",
106106
// CHECK-NEXT: "file-deps": [
107-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
107+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
108108
// CHECK-NEXT: "[[PREFIX]]/transitive.h"
109109
// CHECK-NEXT: ],
110110
// CHECK-NEXT: "link-libraries": [

clang/test/ClangScanDeps/modules-context-hash.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434
// CHECK: ],
3535
// CHECK-NEXT: "context-hash": "[[HASH_MOD_A:.*]]",
3636
// CHECK-NEXT: "file-deps": [
37-
// CHECK-NEXT: "[[PREFIX]]/a/dep.h",
37+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
3838
// CHECK-NEXT: "[[PREFIX]]/mod.h",
39-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
39+
// CHECK-NEXT: "[[PREFIX]]/a/dep.h"
4040
// CHECK-NEXT: ],
4141
// CHECK-NEXT: "link-libraries": [],
4242
// CHECK-NEXT: "name": "mod"
@@ -72,9 +72,9 @@
7272
// CHECK: ],
7373
// CHECK-NOT: "context-hash": "[[HASH_MOD_A]]",
7474
// CHECK: "file-deps": [
75-
// CHECK-NEXT: "[[PREFIX]]/b/dep.h",
75+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
7676
// CHECK-NEXT: "[[PREFIX]]/mod.h",
77-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
77+
// CHECK-NEXT: "[[PREFIX]]/b/dep.h"
7878
// CHECK-NEXT: ],
7979
// CHECK-NEXT: "link-libraries": [],
8080
// CHECK-NEXT: "name": "mod"

clang/test/ClangScanDeps/modules-dep-args.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ module Direct { header "direct.h" }
5858
// CHECK: ],
5959
// CHECK-NEXT: "context-hash": "{{.*}}",
6060
// CHECK-NEXT: "file-deps": [
61-
// CHECK-NEXT: "[[PREFIX]]/direct.h",
62-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
61+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
62+
// CHECK-NEXT: "[[PREFIX]]/direct.h"
6363
// CHECK-NEXT: ],
6464
// CHECK-NEXT: "link-libraries": [],
6565
// CHECK-NEXT: "name": "Direct"

0 commit comments

Comments
 (0)