Skip to content

Commit f1d81db

Browse files
[ASTWriter] Do not allocate source location space for module maps used only for textual headers (#116374)
This is a follow up to #112015 and it reduces the unnecessary duplication of source locations further. We do not need to allocate source location space in the serialized PCMs for module maps used only to find textual headers. Those module maps are never referenced from anywhere in the serialized ASTs and are re-read in other compilations. This change should not affect correctness of Clang compilations or clang-scan-deps in any way. We do need the InputFile entry in the serialized AST because clang-scan-deps relies on it. The previous patch introduced a mechanism to do exactly that. We have found that to finally remove any duplication of module maps we use internally in our build system.
1 parent 6b5c67b commit f1d81db

File tree

2 files changed

+137
-14
lines changed

2 files changed

+137
-14
lines changed

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,14 +184,30 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
184184
const SourceManager &SM = PP.getSourceManager();
185185
const ModuleMap &MM = HS.getModuleMap();
186186

187-
llvm::DenseSet<FileID> ModuleMaps;
188-
189-
llvm::DenseSet<const Module *> ProcessedModules;
190-
auto CollectModuleMapsForHierarchy = [&](const Module *M) {
187+
// Module maps used only by textual headers are special. Their FileID is
188+
// non-affecting, but their FileEntry is (i.e. must be written as InputFile).
189+
enum AffectedReason : bool {
190+
AR_TextualHeader = 0,
191+
AR_ImportOrTextualHeader = 1,
192+
};
193+
auto AssignMostImportant = [](AffectedReason &LHS, AffectedReason RHS) {
194+
LHS = std::max(LHS, RHS);
195+
};
196+
llvm::DenseMap<FileID, AffectedReason> ModuleMaps;
197+
llvm::DenseMap<const Module *, AffectedReason> ProcessedModules;
198+
auto CollectModuleMapsForHierarchy = [&](const Module *M,
199+
AffectedReason Reason) {
191200
M = M->getTopLevelModule();
192201

193-
if (!ProcessedModules.insert(M).second)
202+
// We need to process the header either when it was not present or when we
203+
// previously flagged module map as textual headers and now we found a
204+
// proper import.
205+
if (auto [It, Inserted] = ProcessedModules.insert({M, Reason});
206+
!Inserted && Reason <= It->second) {
194207
return;
208+
} else {
209+
It->second = Reason;
210+
}
195211

196212
std::queue<const Module *> Q;
197213
Q.push(M);
@@ -202,12 +218,12 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
202218
// The containing module map is affecting, because it's being pointed
203219
// into by Module::DefinitionLoc.
204220
if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid())
205-
ModuleMaps.insert(F);
221+
AssignMostImportant(ModuleMaps[F], Reason);
206222
// For inferred modules, the module map that allowed inferring is not
207223
// related to the virtual containing module map file. It did affect the
208224
// compilation, though.
209225
if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid())
210-
ModuleMaps.insert(UniqF);
226+
AssignMostImportant(ModuleMaps[UniqF], Reason);
211227

212228
for (auto *SubM : Mod->submodules())
213229
Q.push(SubM);
@@ -216,7 +232,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
216232

217233
// Handle all the affecting modules referenced from the root module.
218234

219-
CollectModuleMapsForHierarchy(RootModule);
235+
CollectModuleMapsForHierarchy(RootModule, AR_ImportOrTextualHeader);
220236

221237
std::queue<const Module *> Q;
222238
Q.push(RootModule);
@@ -225,9 +241,9 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
225241
Q.pop();
226242

227243
for (const Module *ImportedModule : CurrentModule->Imports)
228-
CollectModuleMapsForHierarchy(ImportedModule);
244+
CollectModuleMapsForHierarchy(ImportedModule, AR_ImportOrTextualHeader);
229245
for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
230-
CollectModuleMapsForHierarchy(UndeclaredModule);
246+
CollectModuleMapsForHierarchy(UndeclaredModule, AR_ImportOrTextualHeader);
231247

232248
for (auto *M : CurrentModule->submodules())
233249
Q.push(M);
@@ -256,7 +272,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
256272

257273
for (const auto &KH : HS.findResolvedModulesForHeader(*File))
258274
if (const Module *M = KH.getModule())
259-
CollectModuleMapsForHierarchy(M);
275+
CollectModuleMapsForHierarchy(M, AR_TextualHeader);
260276
}
261277

262278
// FIXME: This algorithm is not correct for module map hierarchies where
@@ -278,13 +294,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
278294
// includes a module map defining a module that's not a submodule of X.
279295

280296
llvm::DenseSet<const FileEntry *> ModuleFileEntries;
281-
for (FileID MM : ModuleMaps) {
282-
if (auto *FE = SM.getFileEntryForID(MM))
297+
llvm::DenseSet<FileID> ModuleFileIDs;
298+
for (auto [FID, Reason] : ModuleMaps) {
299+
if (Reason == AR_ImportOrTextualHeader)
300+
ModuleFileIDs.insert(FID);
301+
if (auto *FE = SM.getFileEntryForID(FID))
283302
ModuleFileEntries.insert(FE);
284303
}
285304

286305
AffectingModuleMaps R;
287-
R.DefinitionFileIDs = std::move(ModuleMaps);
306+
R.DefinitionFileIDs = std::move(ModuleFileIDs);
288307
R.DefinitionFiles = std::move(ModuleFileEntries);
289308
return std::move(R);
290309
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Same as prune-non-affecting-module-map-repeated.cpp, but check that textual-only
2+
// inclusions do not cause duplication of the module map files they are defined in.
3+
4+
// RUN: rm -rf %t && mkdir %t
5+
// RUN: split-file %s %t
6+
7+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
8+
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mixed.map\
9+
// RUN: -fmodule-map-file=%t/mod1.map \
10+
// RUN: -fmodule-name=mod1 -emit-module %t/mod1.map -o %t/mod1.pcm
11+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
12+
// RUN: -fmodule-map-file=%t/mixed.map\
13+
// RUN: -fmodule-name=mixed -emit-module %t/mixed.map -o %t/mixed.pcm
14+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
15+
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod1.map -fmodule-map-file=%t/mod2.map \
16+
// RUN: -fmodule-file=%t/mod1.pcm -fmodule-file=%t/mixed.pcm \
17+
// RUN: -fmodule-name=mod2 -emit-module %t/mod2.map -o %t/mod2.pcm
18+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
19+
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod2.map -fmodule-map-file=%t/mod3.map \
20+
// RUN: -fmodule-file=%t/mod2.pcm -fmodule-file=%t/mixed.pcm \
21+
// RUN: -fmodule-name=mod3 -emit-module %t/mod3.map -o %t/mod3.pcm
22+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
23+
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod3.map -fmodule-map-file=%t/mod4.map \
24+
// RUN: -fmodule-file=%t/mod3.pcm \
25+
// RUN: -fmodule-name=mod4 -emit-module %t/mod4.map -o %t/mod4.pcm
26+
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod4.map -fmodule-file=%t/mod4.pcm -fsyntax-only -verify %t/check_slocs.cc
27+
28+
//--- base.map
29+
module base { textual header "vector.h" }
30+
//--- mixed.map
31+
module mixed { textual header "mixed_text.h" header "mixed_mod.h"}
32+
//--- mod1.map
33+
module mod1 { header "mod1.h" }
34+
//--- mod2.map
35+
module mod2 { header "mod2.h" }
36+
//--- mod3.map
37+
module mod3 { header "mod3.h" }
38+
//--- mod4.map
39+
module mod4 { header "mod4.h" }
40+
//--- check_slocs.cc
41+
#include "mod4.h"
42+
#include "vector.h"
43+
#pragma clang __debug sloc_usage // expected-remark {{source manager location address space usage}}
44+
// expected-note@* {{% of available space}}
45+
46+
// base.map must only be present once, despite being used in each module.
47+
// Because its location in every module compile should be non-affecting.
48+
49+
// [email protected]:1 {{file entered 1 time}}
50+
51+
// different modules use either only textual header from mixed.map or both textual and modular
52+
// headers. Either combination must lead to only 1 use at the end, because the module is ultimately
53+
// in the import chain and any textual uses should not change that.
54+
55+
// [email protected]:1 {{file entered 1 time}}
56+
57+
// expected-note@* + {{file entered}}
58+
59+
60+
//--- vector.h
61+
#ifndef VECTOR_H
62+
#define VECTOR_H
63+
#endif
64+
65+
//--- mixed_text.h
66+
#ifndef MIXED_TEXT_H
67+
#define MIXED_TEXT_H
68+
#endif
69+
//--- mixed_mod.h
70+
#ifndef MIXED_MOD_H
71+
#define MIXED_MOD_H
72+
#endif
73+
74+
//--- mod1.h
75+
#ifndef MOD1
76+
#define MOD1
77+
#include "vector.h"
78+
#include "mixed_text.h"
79+
int mod1();
80+
#endif
81+
//--- mod2.h
82+
#ifndef MOD2
83+
#define MOD2
84+
#include "vector.h"
85+
#include "mod1.h"
86+
#include "mixed_mod.h"
87+
int mod2();
88+
#endif
89+
//--- mod3.h
90+
#ifndef MOD3
91+
#define MOD3
92+
#include "vector.h"
93+
#include "mod2.h"
94+
#include "mixed_text.h"
95+
#include "mixed_mod.h"
96+
int mod3();
97+
#endif
98+
//--- mod4.h
99+
#ifndef MOD4
100+
#define MOD4
101+
#include "vector.h"
102+
#include "mod3.h"
103+
int mod4();
104+
#endif

0 commit comments

Comments
 (0)