Skip to content

Commit 869f9b0

Browse files
committed
[clang][modules] Allow including module maps to be non-affecting (llvm#89992)
The dependency scanner only puts top-level affecting module map files on the command line for explicitly building a module. This is done because any affecting child module map files should be referenced by the top-level one, meaning listing them explicitly does not have any meaning and only makes the command lines longer. However, a problem arises whenever the definition of an affecting module lives in a module map that is not top-level. Considering the rules explained above, such module map file would not make it to the command line. That's why 83973cf started marking the parents of an affecting module map file as affecting too. This way, the top-level file does make it into the command line. This can be problematic, though. On macOS, for example, the Darwin module lives in "/usr/include/Darwin.modulemap" one of many module map files included by "/usr/include/module.modulemap". Reporting the parent on the command line forces explicit builds to parse all the other module map files included by it, which is not necessary and can get expensive in terms of file system traffic. This patch solves that performance issue by stopping marking parent module map files as affecting, and marking module map files as top-level whenever they are top-level among the set of affecting files, not among the set of all known files. This means that the top-level "/usr/include/module.modulemap" is now not marked as affecting and "/usr/include/Darwin.modulemap" is. (cherry picked from commit 477c705)
1 parent c50b77d commit 869f9b0

File tree

3 files changed

+75
-44
lines changed

3 files changed

+75
-44
lines changed

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ class StoredDeclsList;
7676
class SwitchCase;
7777
class Token;
7878

79+
namespace SrcMgr {
80+
class FileInfo;
81+
} // namespace SrcMgr
82+
7983
/// Writes an AST file containing the contents of a translation unit.
8084
///
8185
/// The ASTWriter class produces a bitstream containing the serialized
@@ -472,6 +476,11 @@ class ASTWriter : public ASTDeserializationListener,
472476
/// during \c SourceManager serialization.
473477
void computeNonAffectingInputFiles();
474478

479+
/// Some affecting files can be included from files that are not affecting.
480+
/// This function erases source locations pointing into such files.
481+
SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr,
482+
const SrcMgr::FileInfo &File);
483+
475484
/// Returns an adjusted \c FileID, accounting for any non-affecting input
476485
/// files.
477486
FileID getAdjustedFileID(FileID FID) const;

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -170,54 +170,50 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
170170

171171
const HeaderSearch &HS = PP.getHeaderSearchInfo();
172172
const ModuleMap &MM = HS.getModuleMap();
173-
const SourceManager &SourceMgr = PP.getSourceManager();
174173

175174
std::set<const FileEntry *> ModuleMaps;
176-
auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) {
177-
if (!ModuleMaps.insert(F).second)
175+
std::set<const Module *> ProcessedModules;
176+
auto CollectModuleMapsForHierarchy = [&](const Module *M) {
177+
M = M->getTopLevelModule();
178+
179+
if (!ProcessedModules.insert(M).second)
178180
return;
179-
SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
180-
// The include location of inferred module maps can point into the header
181-
// file that triggered the inferring. Cut off the walk if that's the case.
182-
while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
183-
FID = SourceMgr.getFileID(Loc);
184-
F = *SourceMgr.getFileEntryRefForID(FID);
185-
if (!ModuleMaps.insert(F).second)
186-
break;
187-
Loc = SourceMgr.getIncludeLoc(FID);
188-
}
189-
};
190181

191-
std::set<const Module *> ProcessedModules;
192-
auto CollectIncludingMapsFromAncestors = [&](const Module *M) {
193-
for (const Module *Mod = M; Mod; Mod = Mod->Parent) {
194-
if (!ProcessedModules.insert(Mod).second)
195-
break;
182+
std::queue<const Module *> Q;
183+
Q.push(M);
184+
while (!Q.empty()) {
185+
const Module *Mod = Q.front();
186+
Q.pop();
187+
196188
// The containing module map is affecting, because it's being pointed
197189
// into by Module::DefinitionLoc.
198-
if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid())
199-
CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
200-
// For inferred modules, the module map that allowed inferring is not in
201-
// the include chain of the virtual containing module map file. It did
202-
// affect the compilation, though.
203-
if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid())
204-
CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
190+
if (auto FE = MM.getContainingModuleMapFile(Mod))
191+
ModuleMaps.insert(*FE);
192+
// For inferred modules, the module map that allowed inferring is not
193+
// related to the virtual containing module map file. It did affect the
194+
// compilation, though.
195+
if (auto FE = MM.getModuleMapFileForUniquing(Mod))
196+
ModuleMaps.insert(*FE);
197+
198+
for (auto *SubM : Mod->submodules())
199+
Q.push(SubM);
205200
}
206201
};
207202

208203
// Handle all the affecting modules referenced from the root module.
209204

205+
CollectModuleMapsForHierarchy(RootModule);
206+
210207
std::queue<const Module *> Q;
211208
Q.push(RootModule);
212209
while (!Q.empty()) {
213210
const Module *CurrentModule = Q.front();
214211
Q.pop();
215212

216-
CollectIncludingMapsFromAncestors(CurrentModule);
217213
for (const Module *ImportedModule : CurrentModule->Imports)
218-
CollectIncludingMapsFromAncestors(ImportedModule);
214+
CollectModuleMapsForHierarchy(ImportedModule);
219215
for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
220-
CollectIncludingMapsFromAncestors(UndeclaredModule);
216+
CollectModuleMapsForHierarchy(UndeclaredModule);
221217

222218
for (auto *M : CurrentModule->submodules())
223219
Q.push(M);
@@ -246,9 +242,27 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
246242

247243
for (const auto &KH : HS.findResolvedModulesForHeader(File))
248244
if (const Module *M = KH.getModule())
249-
CollectIncludingMapsFromAncestors(M);
245+
CollectModuleMapsForHierarchy(M);
250246
}
251247

248+
// FIXME: This algorithm is not correct for module map hierarchies where
249+
// module map file defining a (sub)module of a top-level module X includes
250+
// a module map file that defines a (sub)module of another top-level module Y.
251+
// Whenever X is affecting and Y is not, "replaying" this PCM file will fail
252+
// when parsing module map files for X due to not knowing about the `extern`
253+
// module map for Y.
254+
//
255+
// We don't have a good way to fix it here. We could mark all children of
256+
// affecting module map files as being affecting as well, but that's
257+
// expensive. SourceManager does not model the edge from parent to child
258+
// SLocEntries, so instead, we would need to iterate over leaf module map
259+
// files, walk up their include hierarchy and check whether we arrive at an
260+
// affecting module map.
261+
//
262+
// Instead of complicating and slowing down this function, we should probably
263+
// just ban module map hierarchies where module map defining a (sub)module X
264+
// includes a module map defining a module that's not a submodule of X.
265+
252266
return ModuleMaps;
253267
}
254268

@@ -1648,6 +1662,18 @@ struct InputFileEntry {
16481662

16491663
} // namespace
16501664

1665+
SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr,
1666+
const SrcMgr::FileInfo &File) {
1667+
SourceLocation IncludeLoc = File.getIncludeLoc();
1668+
if (IncludeLoc.isValid()) {
1669+
FileID IncludeFID = SourceMgr.getFileID(IncludeLoc);
1670+
assert(IncludeFID.isValid() && "IncludeLoc in invalid file");
1671+
if (!IsSLocAffecting[IncludeFID.ID])
1672+
IncludeLoc = SourceLocation();
1673+
}
1674+
return IncludeLoc;
1675+
}
1676+
16511677
void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
16521678
HeaderSearchOptions &HSOpts) {
16531679
using namespace llvm;
@@ -1701,7 +1727,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
17011727
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
17021728
Entry.IsTransient = Cache->IsTransient;
17031729
Entry.BufferOverridden = Cache->BufferOverridden;
1704-
Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
1730+
Entry.IsTopLevel = getAffectingIncludeLoc(SourceMgr, File).isInvalid();
17051731
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());
17061732

17071733
auto ContentHash = hash_code(-1);
@@ -2227,7 +2253,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
22272253
SLocEntryOffsets.push_back(Offset);
22282254
// Starting offset of this entry within this module, so skip the dummy.
22292255
Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
2230-
AddSourceLocation(File.getIncludeLoc(), Record);
2256+
AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record);
22312257
Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
22322258
Record.push_back(File.hasLineDirectives());
22332259

clang/test/ClangScanDeps/modules-extern-unrelated.m

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// This test checks that only module map files defining affecting modules are
2+
// affecting.
3+
14
// RUN: rm -rf %t
25
// RUN: split-file %s %t
36

@@ -22,15 +25,8 @@
2225
//--- second/second.h
2326
#include "first_other.h"
2427

25-
//--- cdb.json.template
26-
[{
27-
"directory": "DIR",
28-
"file": "DIR/tu.m",
29-
"command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o"
30-
}]
31-
32-
// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json
33-
// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json
28+
// RUN: clang-scan-deps -format experimental-full -o %t/result.json \
29+
// RUN: -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o
3430
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
3531

3632
// CHECK: {
@@ -67,11 +63,11 @@
6763
// CHECK-NEXT: ],
6864
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap",
6965
// CHECK-NEXT: "command-line": [
66+
// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
7067
// CHECK: ],
7168
// CHECK-NEXT: "context-hash": "{{.*}}",
7269
// CHECK-NEXT: "file-deps": [
7370
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
74-
// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
7571
// CHECK-NEXT: "[[PREFIX]]/second/second.h",
7672
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap"
7773
// CHECK-NEXT: ],
@@ -90,11 +86,11 @@
9086
// CHECK-NEXT: ],
9187
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap",
9288
// CHECK-NEXT: "command-line": [
89+
// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
9390
// CHECK: ],
9491
// CHECK-NEXT: "context-hash": "{{.*}}",
9592
// CHECK-NEXT: "file-deps": [
9693
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
97-
// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
9894
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap",
9995
// CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap",
10096
// CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h"
@@ -115,7 +111,7 @@
115111
// CHECK-NEXT: ],
116112
// CHECK-NEXT: "command-line": [
117113
// CHECK: ],
118-
// CHECK-NEXT: "executable": "clang",
114+
// CHECK-NEXT: "executable": "{{.*}}",
119115
// CHECK-NEXT: "file-deps": [
120116
// CHECK-NEXT: "[[PREFIX]]/tu.m"
121117
// CHECK-NEXT: ],

0 commit comments

Comments
 (0)