Skip to content

Commit 089d50d

Browse files
committed
[clang][modules] Deserialize included files lazily
In D114095, `HeaderFileInfo::NumIncludes` was moved into `Preprocessor`. This still makes sense, because we want to track this on the granularity of submodules (D112915, D114173), but the way this information is serialized is not ideal. In `ASTWriter`, the set of included files gets deserialized eagerly, issuing lots of calls to `FileManager::getFile()` for input files the PCM consumer might not be interested in. This patch makes the information part of the header file info table, taking advantage of its lazy deserialization which typically happens when a file is about to be included. Reviewed By: benlangmuir Differential Revision: https://reviews.llvm.org/D155131 (cherry picked from commit 6504d87)
1 parent 531de5d commit 089d50d

File tree

7 files changed

+37
-82
lines changed

7 files changed

+37
-82
lines changed

clang/include/clang/Lex/Preprocessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,6 +1377,7 @@ class Preprocessor {
13771377

13781378
/// Return true if this header has already been included.
13791379
bool alreadyIncluded(const FileEntry *File) const {
1380+
HeaderInfo.getFileInfo(File);
13801381
return IncludedFiles.count(File);
13811382
}
13821383

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,7 @@ enum ASTRecordTypes {
707707
/// Record code for \#pragma float_control options.
708708
FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
709709

710-
/// Record code for included files.
711-
PP_INCLUDED_FILES = 66,
710+
/// ID 66 used to be the list of included files.
712711

713712
/// Record code for an unterminated \#pragma clang assume_nonnull begin
714713
/// recorded in a preamble.

clang/include/clang/Serialization/ASTReader.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,6 @@ class ASTReader
13791379
llvm::Error ReadSourceManagerBlock(ModuleFile &F);
13801380
llvm::BitstreamCursor &SLocCursorForID(int ID);
13811381
SourceLocation getImportLocation(ModuleFile *F);
1382-
void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
13831382
ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
13841383
const ModuleFile *ImportedBy,
13851384
unsigned ClientLoadCapabilities);

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,6 @@ class ASTWriter : public ASTDeserializationListener,
500500
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
501501
void WriteSourceManagerBlock(SourceManager &SourceMgr,
502502
const Preprocessor &PP);
503-
void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
504503
void WritePreprocessor(const Preprocessor &PP, bool IsModule);
505504
void WriteHeaderSearch(const HeaderSearch &HS);
506505
void WritePreprocessorDetail(PreprocessingRecord &PPRec,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,6 +1839,21 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
18391839
return LocalID + I->second;
18401840
}
18411841

1842+
const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
1843+
FileManager &FileMgr = Reader.getFileManager();
1844+
if (!Key.Imported) {
1845+
if (auto File = FileMgr.getFile(Key.Filename))
1846+
return *File;
1847+
return nullptr;
1848+
}
1849+
1850+
std::string Resolved = std::string(Key.Filename);
1851+
Reader.ResolveImportedPath(M, Resolved);
1852+
if (auto File = FileMgr.getFile(Resolved))
1853+
return *File;
1854+
return nullptr;
1855+
}
1856+
18421857
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
18431858
return llvm::hash_combine(ikey.Size, ikey.ModTime);
18441859
}
@@ -1859,23 +1874,8 @@ bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
18591874
return true;
18601875

18611876
// Determine whether the actual files are equivalent.
1862-
FileManager &FileMgr = Reader.getFileManager();
1863-
auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
1864-
if (!Key.Imported) {
1865-
if (auto File = FileMgr.getFile(Key.Filename))
1866-
return *File;
1867-
return nullptr;
1868-
}
1869-
1870-
std::string Resolved = std::string(Key.Filename);
1871-
Reader.ResolveImportedPath(M, Resolved);
1872-
if (auto File = FileMgr.getFile(Resolved))
1873-
return *File;
1874-
return nullptr;
1875-
};
1876-
1877-
const FileEntry *FEA = GetFile(a);
1878-
const FileEntry *FEB = GetFile(b);
1877+
const FileEntry *FEA = getFile(a);
1878+
const FileEntry *FEB = getFile(b);
18791879
return FEA && FEA == FEB;
18801880
}
18811881

@@ -1904,6 +1904,14 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
19041904
const unsigned char *End = d + DataLen;
19051905
HeaderFileInfo HFI;
19061906
unsigned Flags = *d++;
1907+
1908+
bool Included = (Flags >> 6) & 0x01;
1909+
if (Included)
1910+
if (const FileEntry *FE = getFile(key))
1911+
// Not using \c Preprocessor::markIncluded(), since that would attempt to
1912+
// deserialize this header file info again.
1913+
Reader.getPreprocessor().getIncludedFiles().insert(FE);
1914+
19071915
// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
19081916
HFI.isImport |= (Flags >> 5) & 0x01;
19091917
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -3010,22 +3018,6 @@ ASTReader::ReadControlBlock(ModuleFile &F,
30103018
}
30113019
}
30123020

3013-
void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
3014-
Preprocessor &PP) {
3015-
using namespace llvm::support;
3016-
3017-
const unsigned char *D = (const unsigned char *)Blob.data();
3018-
unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
3019-
3020-
for (unsigned I = 0; I < FileCount; ++I) {
3021-
size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
3022-
InputFileInfo IFI = getInputFileInfo(F, ID);
3023-
if (llvm::ErrorOr<const FileEntry *> File =
3024-
PP.getFileManager().getFile(IFI.Filename))
3025-
PP.getIncludedFiles().insert(*File);
3026-
}
3027-
}
3028-
30293021
llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
30303022
unsigned ClientLoadCapabilities) {
30313023
BitstreamCursor &Stream = F.Stream;
@@ -3754,10 +3746,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
37543746
break;
37553747
}
37563748

3757-
case PP_INCLUDED_FILES:
3758-
readIncludedFiles(F, Blob, PP);
3759-
break;
3760-
37613749
case LATE_PARSED_TEMPLATE:
37623750
LateParsedTemplates.emplace_back(
37633751
std::piecewise_construct, std::forward_as_tuple(&F),

clang/lib/Serialization/ASTReaderInternals.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ class HeaderFileInfoTrait {
276276
static internal_key_type ReadKey(const unsigned char *d, unsigned);
277277

278278
data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
279+
280+
private:
281+
const FileEntry *getFile(const internal_key_type &Key);
279282
};
280283

281284
/// The on-disk hash table used for known header files.

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,6 @@ void ASTWriter::WriteBlockInfoBlock() {
864864
RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
865865
RECORD(PP_CONDITIONAL_STACK);
866866
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
867-
RECORD(PP_INCLUDED_FILES);
868867
RECORD(PP_ASSUME_NONNULL_LOC);
869868

870869
// SourceManager Block.
@@ -1794,6 +1793,7 @@ namespace {
17941793

17951794
struct data_type {
17961795
const HeaderFileInfo &HFI;
1796+
bool AlreadyIncluded;
17971797
ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
17981798
UnresolvedModule Unresolved;
17991799
};
@@ -1839,7 +1839,8 @@ namespace {
18391839
endian::Writer LE(Out, little);
18401840
uint64_t Start = Out.tell(); (void)Start;
18411841

1842-
unsigned char Flags = (Data.HFI.isImport << 5)
1842+
unsigned char Flags = (Data.AlreadyIncluded << 6)
1843+
| (Data.HFI.isImport << 5)
18431844
| (Data.HFI.isPragmaOnce << 4)
18441845
| (Data.HFI.DirInfo << 1)
18451846
| Data.HFI.IndexHeaderMapHeader;
@@ -1941,7 +1942,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
19411942
FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0
19421943
};
19431944
HeaderFileInfoTrait::data_type Data = {
1944-
Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}
1945+
Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}
19451946
};
19461947
// FIXME: Deal with cases where there are multiple unresolved header
19471948
// directives in different submodules for the same header.
@@ -1985,11 +1986,13 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
19851986
SavedStrings.push_back(Filename.data());
19861987
}
19871988

1989+
bool Included = PP->alreadyIncluded(File);
1990+
19881991
HeaderFileInfoTrait::key_type Key = {
19891992
Filename, File->getSize(), getTimestampForOutput(File)
19901993
};
19911994
HeaderFileInfoTrait::data_type Data = {
1992-
*HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
1995+
*HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
19931996
};
19941997
Generator.insert(Key, Data, GeneratorTrait);
19951998
++NumHeaderSearchEntries;
@@ -2287,29 +2290,6 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
22872290
return false;
22882291
}
22892292

2290-
void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
2291-
using namespace llvm::support;
2292-
2293-
const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
2294-
2295-
std::vector<uint32_t> IncludedInputFileIDs;
2296-
IncludedInputFileIDs.reserve(IncludedFiles.size());
2297-
2298-
for (const FileEntry *File : IncludedFiles) {
2299-
auto InputFileIt = InputFileIDs.find(File);
2300-
if (InputFileIt == InputFileIDs.end())
2301-
continue;
2302-
IncludedInputFileIDs.push_back(InputFileIt->second);
2303-
}
2304-
2305-
llvm::sort(IncludedInputFileIDs);
2306-
2307-
endian::Writer LE(Out, little);
2308-
LE.write<uint32_t>(IncludedInputFileIDs.size());
2309-
for (uint32_t ID : IncludedInputFileIDs)
2310-
LE.write<uint32_t>(ID);
2311-
}
2312-
23132293
/// Writes the block containing the serialized form of the
23142294
/// preprocessor.
23152295
void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2555,20 +2535,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
25552535
MacroOffsetsBase - ASTBlockStartOffset};
25562536
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
25572537
}
2558-
2559-
{
2560-
auto Abbrev = std::make_shared<BitCodeAbbrev>();
2561-
Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
2562-
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
2563-
unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
2564-
2565-
SmallString<2048> Buffer;
2566-
raw_svector_ostream Out(Buffer);
2567-
writeIncludedFiles(Out, PP);
2568-
RecordData::value_type Record[] = {PP_INCLUDED_FILES};
2569-
Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
2570-
Buffer.size());
2571-
}
25722538
}
25732539

25742540
void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,

0 commit comments

Comments
 (0)