Skip to content

[clang][modules] Track included files per submodule #71117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/Module.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,8 @@ class alignas(8) Module {
/// to import but didn't because they are not direct uses.
llvm::SmallSetVector<const Module *, 2> UndeclaredUses;

llvm::DenseSet<const FileEntry *> Includes;

/// A library or framework to link against when an entity from this
/// module is used.
struct LinkLibrary {
Expand Down
39 changes: 31 additions & 8 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,8 @@ class Preprocessor {
/// The set of modules that are visible within the submodule.
VisibleModuleSet VisibleModules;

/// The files that have been included.
IncludedFilesSet IncludedFiles;
// FIXME: CounterValue?
// FIXME: PragmaPushMacroInfo?
};
Expand All @@ -994,8 +996,8 @@ class Preprocessor {
/// in a submodule.
SubmoduleState *CurSubmoduleState;

/// The files that have been included.
IncludedFilesSet IncludedFiles;
/// The files that have been included outside of (sub)modules.
IncludedFilesSet Includes;

/// The set of top-level modules that affected preprocessing, but were not
/// imported.
Expand Down Expand Up @@ -1484,19 +1486,40 @@ class Preprocessor {
/// Mark the file as included.
/// Returns true if this is the first time the file was included.
bool markIncluded(FileEntryRef File) {
HeaderInfo.getFileInfo(File);
return IncludedFiles.insert(File).second;
bool AlreadyIncluded = alreadyIncluded(File);
CurSubmoduleState->IncludedFiles.insert(File);
if (!BuildingSubmoduleStack.empty())
BuildingSubmoduleStack.back().M->Includes.insert(File);
else if (Module *M = getCurrentModule())
M->Includes.insert(File);
else
Includes.insert(File);
return !AlreadyIncluded;
}

/// Return true if this header has already been included.
bool alreadyIncluded(FileEntryRef File) const {
HeaderInfo.getFileInfo(File);
return IncludedFiles.count(File);
if (CurSubmoduleState->IncludedFiles.contains(File))
return true;
// TODO: Do this more efficiently.
for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules())
if (CurSubmoduleState->VisibleModules.isVisible(M))
if (M->Includes.contains(File))
return true;
return false;
}

void markIncludedOnTopLevel(const FileEntry *File) {
Includes.insert(File);
CurSubmoduleState->IncludedFiles.insert(File);
}

void markIncludedInModule(Module *M, const FileEntry *File) {
M->Includes.insert(File);
}

/// Get the set of included files.
IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
const IncludedFilesSet &getTopLevelIncludes() const { return Includes; }

/// Return the name of the macro defined before \p Loc that has
/// spelling \p Tokens. If there are multiple macros with same spelling,
Expand Down
52 changes: 28 additions & 24 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1975,19 +1975,15 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
return LocalID + I->second;
}

const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
OptionalFileEntryRefDegradesToFileEntryPtr
HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
FileManager &FileMgr = Reader.getFileManager();
if (!Key.Imported) {
if (auto File = FileMgr.getFile(Key.Filename))
return *File;
return nullptr;
}
if (!Key.Imported)
return FileMgr.getOptionalFileRef(Key.Filename);

std::string Resolved = std::string(Key.Filename);
Reader.ResolveImportedPath(M, Resolved);
if (auto File = FileMgr.getFile(Resolved))
return *File;
return nullptr;
return FileMgr.getOptionalFileRef(Resolved);
}

unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
Expand Down Expand Up @@ -2043,13 +2039,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
HeaderFileInfo HFI;
unsigned Flags = *d++;

bool Included = (Flags >> 6) & 0x01;
if (Included)
if (const FileEntry *FE = getFile(key))
// Not using \c Preprocessor::markIncluded(), since that would attempt to
// deserialize this header file info again.
Reader.getPreprocessor().getIncludedFiles().insert(FE);

// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
HFI.isImport |= (Flags >> 5) & 0x01;
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
Expand All @@ -2065,6 +2054,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
}

OptionalFileEntryRefDegradesToFileEntryPtr FE = getFile(key);
Preprocessor &PP = Reader.getPreprocessor();
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();

unsigned IncludedCount =
endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
for (unsigned I = 0; I < IncludedCount; ++I) {
uint32_t LocalSMID =
endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
if (!FE)
continue;

if (LocalSMID == 0) {
PP.markIncludedOnTopLevel(*FE);
} else {
SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
Module *Mod = Reader.getSubmodule(GlobalSMID);
PP.markIncludedInModule(Mod, *FE);
}
}

assert((End - d) % 4 == 0 &&
"Wrong data length in HeaderFileInfo deserialization");
while (d != End) {
Expand All @@ -2077,14 +2087,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
// implicit module import.
SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
Module *Mod = Reader.getSubmodule(GlobalSMID);
FileManager &FileMgr = Reader.getFileManager();
ModuleMap &ModMap =
Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();

std::string Filename = std::string(key.Filename);
if (key.Imported)
Reader.ResolveImportedPath(M, Filename);
if (auto FE = FileMgr.getOptionalFileRef(Filename)) {

if (FE) {
// FIXME: NameAsWritten
Module::Header H = {std::string(key.Filename), "", *FE};
ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Serialization/ASTReaderInternals.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,8 @@ class HeaderFileInfoTrait {
data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);

private:
const FileEntry *getFile(const internal_key_type &Key);
OptionalFileEntryRefDegradesToFileEntryPtr
getFile(const internal_key_type &Key);
};

/// The on-disk hash table used for known header files.
Expand Down
39 changes: 33 additions & 6 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1842,7 +1842,7 @@ namespace {

struct data_type {
const HeaderFileInfo &HFI;
bool AlreadyIncluded;
std::vector<const Module *> Includers;
ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
UnresolvedModule Unresolved;
};
Expand All @@ -1862,6 +1862,12 @@ namespace {
EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
unsigned DataLen = 1 + 4 + 4;

DataLen += 4;
for (const Module *M : Data.Includers)
if (!M || Writer.getLocalOrImportedSubmoduleID(M))
DataLen += 4;

for (auto ModInfo : Data.KnownHeaders)
if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
DataLen += 4;
Expand All @@ -1888,8 +1894,7 @@ namespace {
endian::Writer LE(Out, llvm::endianness::little);
uint64_t Start = Out.tell(); (void)Start;

unsigned char Flags = (Data.AlreadyIncluded << 6)
| (Data.HFI.isImport << 5)
unsigned char Flags = (Data.HFI.isImport << 5)
| (Data.HFI.isPragmaOnce << 4)
| (Data.HFI.DirInfo << 1)
| Data.HFI.IndexHeaderMapHeader;
Expand All @@ -1916,6 +1921,14 @@ namespace {
}
LE.write<uint32_t>(Offset);

LE.write<uint32_t>(Data.Includers.size());
for (const Module *M : Data.Includers) {
if (!M)
LE.write<uint32_t>(0);
else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M))
LE.write<uint32_t>(ModID);
}

auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
uint32_t Value = (ModID << 3) | (unsigned)Role;
Expand Down Expand Up @@ -1990,7 +2003,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
HeaderFileInfoTrait::key_type Key = {
FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
HeaderFileInfoTrait::data_type Data = {
Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
// FIXME: Deal with cases where there are multiple unresolved header
// directives in different submodules for the same header.
Generator.insert(Key, Data, GeneratorTrait);
Expand Down Expand Up @@ -2033,13 +2046,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
SavedStrings.push_back(Filename.data());
}

bool Included = PP->alreadyIncluded(*File);
std::vector<const Module *> Includers;
if (WritingModule) {
llvm::DenseSet<const Module *> Seen;
std::function<void(const Module *)> Visit = [&](const Module *M) {
if (!Seen.insert(M).second)
return;
if (M->Includes.contains(*File))
Includers.push_back(M);
for (const Module *SubM : M->submodules())
Visit(SubM);
};
Visit(WritingModule);
} else if (PP->getTopLevelIncludes().contains(*File)) {
Includers.push_back(nullptr);
}

HeaderFileInfoTrait::key_type Key = {
Filename, File->getSize(), getTimestampForOutput(*File)
};
HeaderFileInfoTrait::data_type Data = {
*HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
*HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
};
Generator.insert(Key, Data, GeneratorTrait);
++NumHeaderSearchEntries;
Expand Down
53 changes: 53 additions & 0 deletions clang/test/Modules/per-submodule-includes.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: rm -rf %t
// RUN: split-file %s %t

//--- frameworks/Textual.framework/Headers/Header.h
static int symbol;

//--- frameworks/FW.framework/Modules/module.modulemap
framework module FW {
umbrella header "FW.h"
export *
module * { export * }
}
//--- frameworks/FW.framework/Headers/FW.h
#import <FW/Sub1.h>
#import <FW/Sub2.h>
//--- frameworks/FW.framework/Headers/Sub1.h
//--- frameworks/FW.framework/Headers/Sub2.h
#import <Textual/Header.h>

//--- pch.modulemap
module __PCH {
header "pch.h"
export *
}
//--- pch.h
#import <FW/Sub1.h>

//--- tu.m
#import <Textual/Header.h>
int fn() { return symbol; }

// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the
// PCH is treated textually due to -fmodule-name=FW.
//
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
// RUN: -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch
//
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
// RUN: -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m

// Compilation using the PCH as precompiled module fails. The import of FW/Sub1.h
// in the PCH is translated to an import. Nothing is preventing that now that
// -fmodule-name=FW has been replaced with -fmodule-name=__PCH.
//
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
// RUN: -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o %t/pch.h.pcm
//
// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in
// FW.Sub2), so the TU does not import it again. It's contents remain invisible,
// though.
//
// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
// RUN: -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only %t/tu.m