Skip to content

[ASTWriter] Do not allocate source location space for module maps used only for textual headers #116374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 33 additions & 14 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,30 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
const SourceManager &SM = PP.getSourceManager();
const ModuleMap &MM = HS.getModuleMap();

llvm::DenseSet<FileID> ModuleMaps;

llvm::DenseSet<const Module *> ProcessedModules;
auto CollectModuleMapsForHierarchy = [&](const Module *M) {
// Module maps used only by textual headers are special. Their FileID is
// non-affecting, but their FileEntry is (i.e. must be written as InputFile).
enum AffectedReason : bool {
AR_TextualHeader = 0,
AR_ImportOrTextualHeader = 1,
};
auto AssignMostImportant = [](AffectedReason &LHS, AffectedReason RHS) {
LHS = std::max(LHS, RHS);
};
llvm::DenseMap<FileID, AffectedReason> ModuleMaps;
llvm::DenseMap<const Module *, AffectedReason> ProcessedModules;
auto CollectModuleMapsForHierarchy = [&](const Module *M,
AffectedReason Reason) {
M = M->getTopLevelModule();

if (!ProcessedModules.insert(M).second)
// We need to process the header either when it was not present or when we
// previously flagged module map as textual headers and now we found a
// proper import.
if (auto [It, Inserted] = ProcessedModules.insert({M, Reason});
!Inserted && Reason <= It->second) {
return;
} else {
It->second = Reason;
}
Comment on lines +208 to +210
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid else after return.

Suggested change
} else {
It->second = Reason;
}
}
It->second = Reason;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot in that case, because It is only valid for the scope of the if.
Increasing the visibility scope of It seems like a worse trade-off. But let me know if you have other suggestions how to rewrite the code better, I might be missing some ideas.


std::queue<const Module *> Q;
Q.push(M);
Expand All @@ -202,12 +218,12 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
// The containing module map is affecting, because it's being pointed
// into by Module::DefinitionLoc.
if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid())
ModuleMaps.insert(F);
AssignMostImportant(ModuleMaps[F], Reason);
// For inferred modules, the module map that allowed inferring is not
// related to the virtual containing module map file. It did affect the
// compilation, though.
if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid())
ModuleMaps.insert(UniqF);
AssignMostImportant(ModuleMaps[UniqF], Reason);

for (auto *SubM : Mod->submodules())
Q.push(SubM);
Expand All @@ -216,7 +232,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {

// Handle all the affecting modules referenced from the root module.

CollectModuleMapsForHierarchy(RootModule);
CollectModuleMapsForHierarchy(RootModule, AR_ImportOrTextualHeader);

std::queue<const Module *> Q;
Q.push(RootModule);
Expand All @@ -225,9 +241,9 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
Q.pop();

for (const Module *ImportedModule : CurrentModule->Imports)
CollectModuleMapsForHierarchy(ImportedModule);
CollectModuleMapsForHierarchy(ImportedModule, AR_ImportOrTextualHeader);
for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
CollectModuleMapsForHierarchy(UndeclaredModule);
CollectModuleMapsForHierarchy(UndeclaredModule, AR_ImportOrTextualHeader);

for (auto *M : CurrentModule->submodules())
Q.push(M);
Expand Down Expand Up @@ -256,7 +272,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {

for (const auto &KH : HS.findResolvedModulesForHeader(*File))
if (const Module *M = KH.getModule())
CollectModuleMapsForHierarchy(M);
CollectModuleMapsForHierarchy(M, AR_TextualHeader);
}

// FIXME: This algorithm is not correct for module map hierarchies where
Expand All @@ -278,13 +294,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
// includes a module map defining a module that's not a submodule of X.

llvm::DenseSet<const FileEntry *> ModuleFileEntries;
for (FileID MM : ModuleMaps) {
if (auto *FE = SM.getFileEntryForID(MM))
llvm::DenseSet<FileID> ModuleFileIDs;
for (auto [FID, Reason] : ModuleMaps) {
if (Reason == AR_ImportOrTextualHeader)
ModuleFileIDs.insert(FID);
if (auto *FE = SM.getFileEntryForID(FID))
ModuleFileEntries.insert(FE);
}

AffectingModuleMaps R;
R.DefinitionFileIDs = std::move(ModuleMaps);
R.DefinitionFileIDs = std::move(ModuleFileIDs);
R.DefinitionFiles = std::move(ModuleFileEntries);
return std::move(R);
}
Expand Down
104 changes: 104 additions & 0 deletions clang/test/Modules/prune-non-affecting-module-map-repeated-textual.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Same as prune-non-affecting-module-map-repeated.cpp, but check that textual-only
// inclusions do not cause duplication of the module map files they are defined in.

// RUN: rm -rf %t && mkdir %t
// RUN: split-file %s %t

// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mixed.map\
// RUN: -fmodule-map-file=%t/mod1.map \
// RUN: -fmodule-name=mod1 -emit-module %t/mod1.map -o %t/mod1.pcm
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
// RUN: -fmodule-map-file=%t/mixed.map\
// RUN: -fmodule-name=mixed -emit-module %t/mixed.map -o %t/mixed.pcm
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod1.map -fmodule-map-file=%t/mod2.map \
// RUN: -fmodule-file=%t/mod1.pcm -fmodule-file=%t/mixed.pcm \
// RUN: -fmodule-name=mod2 -emit-module %t/mod2.map -o %t/mod2.pcm
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod2.map -fmodule-map-file=%t/mod3.map \
// RUN: -fmodule-file=%t/mod2.pcm -fmodule-file=%t/mixed.pcm \
// RUN: -fmodule-name=mod3 -emit-module %t/mod3.map -o %t/mod3.pcm
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod3.map -fmodule-map-file=%t/mod4.map \
// RUN: -fmodule-file=%t/mod3.pcm \
// RUN: -fmodule-name=mod4 -emit-module %t/mod4.map -o %t/mod4.pcm
// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod4.map -fmodule-file=%t/mod4.pcm -fsyntax-only -verify %t/check_slocs.cc

//--- base.map
module base { textual header "vector.h" }
//--- mixed.map
module mixed { textual header "mixed_text.h" header "mixed_mod.h"}
//--- mod1.map
module mod1 { header "mod1.h" }
//--- mod2.map
module mod2 { header "mod2.h" }
//--- mod3.map
module mod3 { header "mod3.h" }
//--- mod4.map
module mod4 { header "mod4.h" }
//--- check_slocs.cc
#include "mod4.h"
#include "vector.h"
#pragma clang __debug sloc_usage // expected-remark {{source manager location address space usage}}
// expected-note@* {{% of available space}}

// base.map must only be present once, despite being used in each module.
// Because its location in every module compile should be non-affecting.

// [email protected]:1 {{file entered 1 time}}

// different modules use either only textual header from mixed.map or both textual and modular
// headers. Either combination must lead to only 1 use at the end, because the module is ultimately
// in the import chain and any textual uses should not change that.

// [email protected]:1 {{file entered 1 time}}

// expected-note@* + {{file entered}}


//--- vector.h
#ifndef VECTOR_H
#define VECTOR_H
#endif

//--- mixed_text.h
#ifndef MIXED_TEXT_H
#define MIXED_TEXT_H
#endif
//--- mixed_mod.h
#ifndef MIXED_MOD_H
#define MIXED_MOD_H
#endif

//--- mod1.h
#ifndef MOD1
#define MOD1
#include "vector.h"
#include "mixed_text.h"
int mod1();
#endif
//--- mod2.h
#ifndef MOD2
#define MOD2
#include "vector.h"
#include "mod1.h"
#include "mixed_mod.h"
int mod2();
#endif
//--- mod3.h
#ifndef MOD3
#define MOD3
#include "vector.h"
#include "mod2.h"
#include "mixed_text.h"
#include "mixed_mod.h"
int mod3();
#endif
//--- mod4.h
#ifndef MOD4
#define MOD4
#include "vector.h"
#include "mod3.h"
int mod4();
#endif
Loading