Skip to content

Commit 84df7a0

Browse files
authored
[clang][modules] Do not resolve HeaderFileInfo externally in ASTWriter (#87848)
Clang uses the `HeaderFileInfo` struct to track bits of information on header files, which gets used throughout the compiler. We also use this to compute the set of affecting module maps in `ASTWriter` and in the end serialize the information into the `HEADER_SEARCH_TABLE` record of a PCM file, allowing clients to learn about headers from the module. In doing so, Clang asks for existing `HeaderFileInfo` for all known `FileEntries`. Note that this asks the loaded PCM files for the information they have on each header file in question. This seems unnecessary: we only want to serialize information on header files that either belong to the current module or that got included textually. Loaded PCM files can't provide us with any useful information. For explicit modules with lazy loading (using `-fmodule-map-file=<path>` with `-fmodule-file=<name>=<path>`) the compiler knows about header files listed in the module map files on the command-line. This can be a large number. Asking for existing `HeaderFileInfo` can trigger deserialization of `HEADER_SEARCH_TABLE` from loaded PCM files. Keys of the on-disk hash table consist of the header file size and modification time. However, with explicit modules Clang zeroes out the modification time. Moreover, if you import lots of modules, some of their header files end up having identical sizes. This means lots of hash collisions that can only be resolved by running the serialized filename through `FileManager` and comparing equality of the `FileEntry`. This ends up being super expensive, essentially re-stating lots of the transitively loaded SDK header files. This patch cleans up the API for getting `HeaderFileInfo` and makes sure `ASTWriter` uses the version that doesn't ask loaded PCM files for more information. This removes the excessive stat traffic coming from `ASTWriter` hopefully without changing observable behavior.
1 parent a952c12 commit 84df7a0

File tree

4 files changed

+60
-54
lines changed

4 files changed

+60
-54
lines changed

clang-tools-extra/clangd/index/SymbolCollector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache {
409409
// Framework headers are spelled as <FrameworkName/Foo.h>, not
410410
// "path/FrameworkName.framework/Headers/Foo.h".
411411
auto &HS = PP->getHeaderSearchInfo();
412-
if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false))
412+
if (const auto *HFI = HS.getExistingLocalFileInfo(*FE))
413413
if (!HFI->Framework.empty())
414414
if (auto Spelling =
415415
getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))

clang/include/clang/Lex/HeaderSearch.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -547,14 +547,15 @@ class HeaderSearch {
547547
/// Return whether the specified file is a normal header,
548548
/// a system header, or a C++ friendly system header.
549549
SrcMgr::CharacteristicKind getFileDirFlavor(FileEntryRef File) {
550-
return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo;
550+
if (const HeaderFileInfo *HFI = getExistingFileInfo(File))
551+
return (SrcMgr::CharacteristicKind)HFI->DirInfo;
552+
return (SrcMgr::CharacteristicKind)HeaderFileInfo().DirInfo;
551553
}
552554

553555
/// Mark the specified file as a "once only" file due to
554556
/// \#pragma once.
555557
void MarkFileIncludeOnce(FileEntryRef File) {
556-
HeaderFileInfo &FI = getFileInfo(File);
557-
FI.isPragmaOnce = true;
558+
getFileInfo(File).isPragmaOnce = true;
558559
}
559560

560561
/// Mark the specified file as a system header, e.g. due to
@@ -834,16 +835,17 @@ class HeaderSearch {
834835

835836
unsigned header_file_size() const { return FileInfo.size(); }
836837

837-
/// Return the HeaderFileInfo structure for the specified FileEntry,
838-
/// in preparation for updating it in some way.
838+
/// Return the HeaderFileInfo structure for the specified FileEntry, in
839+
/// preparation for updating it in some way.
839840
HeaderFileInfo &getFileInfo(FileEntryRef FE);
840841

841-
/// Return the HeaderFileInfo structure for the specified FileEntry,
842-
/// if it has ever been filled in.
843-
/// \param WantExternal Whether the caller wants purely-external header file
844-
/// info (where \p External is true).
845-
const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE,
846-
bool WantExternal = true) const;
842+
/// Return the HeaderFileInfo structure for the specified FileEntry, if it has
843+
/// ever been filled in (either locally or externally).
844+
const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const;
845+
846+
/// Return the headerFileInfo structure for the specified FileEntry, if it has
847+
/// ever been filled in locally.
848+
const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const;
847849

848850
SearchDirIterator search_dir_begin() { return {*this, 0}; }
849851
SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; }

clang/lib/Lex/HeaderSearch.cpp

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -946,9 +946,13 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
946946
// If we have no includer, that means we're processing a #include
947947
// from a module build. We should treat this as a system header if we're
948948
// building a [system] module.
949-
bool IncluderIsSystemHeader =
950-
Includer ? getFileInfo(*Includer).DirInfo != SrcMgr::C_User :
951-
BuildSystemModule;
949+
bool IncluderIsSystemHeader = [&]() {
950+
if (!Includer)
951+
return BuildSystemModule;
952+
const HeaderFileInfo *HFI = getExistingFileInfo(*Includer);
953+
assert(HFI && "includer without file info");
954+
return HFI->DirInfo != SrcMgr::C_User;
955+
}();
952956
if (OptionalFileEntryRef FE = getFileAndSuggestModule(
953957
TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,
954958
RequestingModule, SuggestedModule)) {
@@ -963,10 +967,11 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
963967
// Note that we only use one of FromHFI/ToHFI at once, due to potential
964968
// reallocation of the underlying vector potentially making the first
965969
// reference binding dangling.
966-
HeaderFileInfo &FromHFI = getFileInfo(*Includer);
967-
unsigned DirInfo = FromHFI.DirInfo;
968-
bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader;
969-
StringRef Framework = FromHFI.Framework;
970+
const HeaderFileInfo *FromHFI = getExistingFileInfo(*Includer);
971+
assert(FromHFI && "includer without file info");
972+
unsigned DirInfo = FromHFI->DirInfo;
973+
bool IndexHeaderMapHeader = FromHFI->IndexHeaderMapHeader;
974+
StringRef Framework = FromHFI->Framework;
970975

971976
HeaderFileInfo &ToHFI = getFileInfo(*FE);
972977
ToHFI.DirInfo = DirInfo;
@@ -1153,10 +1158,12 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
11531158
// "Foo" is the name of the framework in which the including header was found.
11541159
if (!Includers.empty() && Includers.front().first && !isAngled &&
11551160
!Filename.contains('/')) {
1156-
HeaderFileInfo &IncludingHFI = getFileInfo(*Includers.front().first);
1157-
if (IncludingHFI.IndexHeaderMapHeader) {
1161+
const HeaderFileInfo *IncludingHFI =
1162+
getExistingFileInfo(*Includers.front().first);
1163+
assert(IncludingHFI && "includer without file info");
1164+
if (IncludingHFI->IndexHeaderMapHeader) {
11581165
SmallString<128> ScratchFilename;
1159-
ScratchFilename += IncludingHFI.Framework;
1166+
ScratchFilename += IncludingHFI->Framework;
11601167
ScratchFilename += '/';
11611168
ScratchFilename += Filename;
11621169

@@ -1286,11 +1293,11 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
12861293
}
12871294

12881295
// This file is a system header or C++ unfriendly if the old file is.
1289-
//
1290-
// Note that the temporary 'DirInfo' is required here, as either call to
1291-
// getFileInfo could resize the vector and we don't want to rely on order
1292-
// of evaluation.
1293-
unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
1296+
const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt);
1297+
assert(ContextHFI && "context file without file info");
1298+
// Note that the temporary 'DirInfo' is required here, as the call to
1299+
// getFileInfo could resize the vector and might invalidate 'ContextHFI'.
1300+
unsigned DirInfo = ContextHFI->DirInfo;
12941301
getFileInfo(*File).DirInfo = DirInfo;
12951302

12961303
FrameworkName.pop_back(); // remove the trailing '/'
@@ -1348,8 +1355,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
13481355
HFI.Framework = OtherHFI.Framework;
13491356
}
13501357

1351-
/// getFileInfo - Return the HeaderFileInfo structure for the specified
1352-
/// FileEntry.
13531358
HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
13541359
if (FE.getUID() >= FileInfo.size())
13551360
FileInfo.resize(FE.getUID() + 1);
@@ -1366,27 +1371,20 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
13661371
}
13671372

13681373
HFI->IsValid = true;
1369-
// We have local information about this header file, so it's no longer
1370-
// strictly external.
1374+
// We assume the caller has local information about this header file, so it's
1375+
// no longer strictly external.
13711376
HFI->External = false;
13721377
return *HFI;
13731378
}
13741379

1375-
const HeaderFileInfo *
1376-
HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
1377-
// If we have an external source, ensure we have the latest information.
1378-
// FIXME: Use a generation count to check whether this is really up to date.
1380+
const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const {
13791381
HeaderFileInfo *HFI;
13801382
if (ExternalSource) {
1381-
if (FE.getUID() >= FileInfo.size()) {
1382-
if (!WantExternal)
1383-
return nullptr;
1383+
if (FE.getUID() >= FileInfo.size())
13841384
FileInfo.resize(FE.getUID() + 1);
1385-
}
13861385

13871386
HFI = &FileInfo[FE.getUID()];
1388-
if (!WantExternal && (!HFI->IsValid || HFI->External))
1389-
return nullptr;
1387+
// FIXME: Use a generation count to check whether this is really up to date.
13901388
if (!HFI->Resolved) {
13911389
auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
13921390
if (ExternalHFI.IsValid) {
@@ -1395,16 +1393,25 @@ HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
13951393
mergeHeaderFileInfo(*HFI, ExternalHFI);
13961394
}
13971395
}
1398-
} else if (FE.getUID() >= FileInfo.size()) {
1399-
return nullptr;
1400-
} else {
1396+
} else if (FE.getUID() < FileInfo.size()) {
14011397
HFI = &FileInfo[FE.getUID()];
1398+
} else {
1399+
HFI = nullptr;
14021400
}
14031401

1404-
if (!HFI->IsValid || (HFI->External && !WantExternal))
1405-
return nullptr;
1402+
return (HFI && HFI->IsValid) ? HFI : nullptr;
1403+
}
1404+
1405+
const HeaderFileInfo *
1406+
HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const {
1407+
HeaderFileInfo *HFI;
1408+
if (FE.getUID() < FileInfo.size()) {
1409+
HFI = &FileInfo[FE.getUID()];
1410+
} else {
1411+
HFI = nullptr;
1412+
}
14061413

1407-
return HFI;
1414+
return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr;
14081415
}
14091416

14101417
bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
185185
if (!File)
186186
continue;
187187

188-
const HeaderFileInfo *HFI =
189-
HS.getExistingFileInfo(*File, /*WantExternal*/ false);
188+
const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
190189
if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
191190
continue;
192191

@@ -2052,14 +2051,12 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
20522051
if (!File)
20532052
continue;
20542053

2055-
// Get the file info. This will load info from the external source if
2056-
// necessary. Skip emitting this file if we have no information on it
2057-
// as a header file (in which case HFI will be null) or if it hasn't
2054+
// Get the file info. Skip emitting this file if we have no information on
2055+
// it as a header file (in which case HFI will be null) or if it hasn't
20582056
// changed since it was loaded. Also skip it if it's for a modular header
20592057
// from a different module; in that case, we rely on the module(s)
20602058
// containing the header to provide this information.
2061-
const HeaderFileInfo *HFI =
2062-
HS.getExistingFileInfo(*File, /*WantExternal*/!Chain);
2059+
const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
20632060
if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
20642061
continue;
20652062

0 commit comments

Comments
 (0)