Skip to content

Commit 6504d87

Browse files
committed
[clang][modules] Deserialize included files lazily
In D114095, `HeaderFileInfo::NumIncludes` was moved into `Preprocessor`. This still makes sense, because we want to track this on the granularity of submodules (D112915, D114173), but the way this information is serialized is not ideal. In `ASTWriter`, the set of included files gets deserialized eagerly, issuing lots of calls to `FileManager::getFile()` for input files the PCM consumer might not be interested in. This patch makes the information part of the header file info table, taking advantage of its lazy deserialization which typically happens when a file is about to be included. Reviewed By: benlangmuir Differential Revision: https://reviews.llvm.org/D155131
1 parent 816141c commit 6504d87

File tree

7 files changed

+38
-83
lines changed

7 files changed

+38
-83
lines changed

clang/include/clang/Lex/Preprocessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,7 @@ class Preprocessor {
14861486

14871487
/// Return true if this header has already been included.
14881488
bool alreadyIncluded(const FileEntry *File) const {
1489+
HeaderInfo.getFileInfo(File);
14891490
return IncludedFiles.count(File);
14901491
}
14911492

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 25;
44+
const unsigned VERSION_MAJOR = 26;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.
@@ -696,8 +696,7 @@ enum ASTRecordTypes {
696696
/// Record code for \#pragma float_control options.
697697
FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
698698

699-
/// Record code for included files.
700-
PP_INCLUDED_FILES = 66,
699+
/// ID 66 used to be the list of included files.
701700

702701
/// Record code for an unterminated \#pragma clang assume_nonnull begin
703702
/// recorded in a preamble.

clang/include/clang/Serialization/ASTReader.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1391,7 +1391,6 @@ class ASTReader
13911391
void ParseLineTable(ModuleFile &F, const RecordData &Record);
13921392
llvm::Error ReadSourceManagerBlock(ModuleFile &F);
13931393
SourceLocation getImportLocation(ModuleFile *F);
1394-
void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
13951394
ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
13961395
const ModuleFile *ImportedBy,
13971396
unsigned ClientLoadCapabilities);

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,6 @@ class ASTWriter : public ASTDeserializationListener,
500500
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
501501
void WriteSourceManagerBlock(SourceManager &SourceMgr,
502502
const Preprocessor &PP);
503-
void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
504503
void WritePreprocessor(const Preprocessor &PP, bool IsModule);
505504
void WriteHeaderSearch(const HeaderSearch &HS);
506505
void WritePreprocessorDetail(PreprocessingRecord &PPRec,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1875,6 +1875,21 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
18751875
return LocalID + I->second;
18761876
}
18771877

1878+
const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
1879+
FileManager &FileMgr = Reader.getFileManager();
1880+
if (!Key.Imported) {
1881+
if (auto File = FileMgr.getFile(Key.Filename))
1882+
return *File;
1883+
return nullptr;
1884+
}
1885+
1886+
std::string Resolved = std::string(Key.Filename);
1887+
Reader.ResolveImportedPath(M, Resolved);
1888+
if (auto File = FileMgr.getFile(Resolved))
1889+
return *File;
1890+
return nullptr;
1891+
}
1892+
18781893
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
18791894
return llvm::hash_combine(ikey.Size, ikey.ModTime);
18801895
}
@@ -1895,23 +1910,8 @@ bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
18951910
return true;
18961911

18971912
// Determine whether the actual files are equivalent.
1898-
FileManager &FileMgr = Reader.getFileManager();
1899-
auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
1900-
if (!Key.Imported) {
1901-
if (auto File = FileMgr.getFile(Key.Filename))
1902-
return *File;
1903-
return nullptr;
1904-
}
1905-
1906-
std::string Resolved = std::string(Key.Filename);
1907-
Reader.ResolveImportedPath(M, Resolved);
1908-
if (auto File = FileMgr.getFile(Resolved))
1909-
return *File;
1910-
return nullptr;
1911-
};
1912-
1913-
const FileEntry *FEA = GetFile(a);
1914-
const FileEntry *FEB = GetFile(b);
1913+
const FileEntry *FEA = getFile(a);
1914+
const FileEntry *FEB = getFile(b);
19151915
return FEA && FEA == FEB;
19161916
}
19171917

@@ -1940,6 +1940,14 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
19401940
const unsigned char *End = d + DataLen;
19411941
HeaderFileInfo HFI;
19421942
unsigned Flags = *d++;
1943+
1944+
bool Included = (Flags >> 6) & 0x01;
1945+
if (Included)
1946+
if (const FileEntry *FE = getFile(key))
1947+
// Not using \c Preprocessor::markIncluded(), since that would attempt to
1948+
// deserialize this header file info again.
1949+
Reader.getPreprocessor().getIncludedFiles().insert(FE);
1950+
19431951
// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
19441952
HFI.isImport |= (Flags >> 5) & 0x01;
19451953
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -3028,22 +3036,6 @@ ASTReader::ReadControlBlock(ModuleFile &F,
30283036
}
30293037
}
30303038

3031-
void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
3032-
Preprocessor &PP) {
3033-
using namespace llvm::support;
3034-
3035-
const unsigned char *D = (const unsigned char *)Blob.data();
3036-
unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
3037-
3038-
for (unsigned I = 0; I < FileCount; ++I) {
3039-
size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
3040-
InputFileInfo IFI = getInputFileInfo(F, ID);
3041-
if (llvm::ErrorOr<const FileEntry *> File =
3042-
PP.getFileManager().getFile(IFI.Filename))
3043-
PP.getIncludedFiles().insert(*File);
3044-
}
3045-
}
3046-
30473039
llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
30483040
unsigned ClientLoadCapabilities) {
30493041
BitstreamCursor &Stream = F.Stream;
@@ -3795,10 +3787,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
37953787
break;
37963788
}
37973789

3798-
case PP_INCLUDED_FILES:
3799-
readIncludedFiles(F, Blob, PP);
3800-
break;
3801-
38023790
case LATE_PARSED_TEMPLATE:
38033791
LateParsedTemplates.emplace_back(
38043792
std::piecewise_construct, std::forward_as_tuple(&F),

clang/lib/Serialization/ASTReaderInternals.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ class HeaderFileInfoTrait {
276276
static internal_key_type ReadKey(const unsigned char *d, unsigned);
277277

278278
data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
279+
280+
private:
281+
const FileEntry *getFile(const internal_key_type &Key);
279282
};
280283

281284
/// The on-disk hash table used for known header files.

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,6 @@ void ASTWriter::WriteBlockInfoBlock() {
866866
RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
867867
RECORD(PP_CONDITIONAL_STACK);
868868
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
869-
RECORD(PP_INCLUDED_FILES);
870869
RECORD(PP_ASSUME_NONNULL_LOC);
871870

872871
// SourceManager Block.
@@ -1763,6 +1762,7 @@ namespace {
17631762

17641763
struct data_type {
17651764
const HeaderFileInfo &HFI;
1765+
bool AlreadyIncluded;
17661766
ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
17671767
UnresolvedModule Unresolved;
17681768
};
@@ -1808,7 +1808,8 @@ namespace {
18081808
endian::Writer LE(Out, little);
18091809
uint64_t Start = Out.tell(); (void)Start;
18101810

1811-
unsigned char Flags = (Data.HFI.isImport << 5)
1811+
unsigned char Flags = (Data.AlreadyIncluded << 6)
1812+
| (Data.HFI.isImport << 5)
18121813
| (Data.HFI.isPragmaOnce << 4)
18131814
| (Data.HFI.DirInfo << 1)
18141815
| Data.HFI.IndexHeaderMapHeader;
@@ -1909,7 +1910,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
19091910
HeaderFileInfoTrait::key_type Key = {
19101911
FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
19111912
HeaderFileInfoTrait::data_type Data = {
1912-
Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
1913+
Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
19131914
// FIXME: Deal with cases where there are multiple unresolved header
19141915
// directives in different submodules for the same header.
19151916
Generator.insert(Key, Data, GeneratorTrait);
@@ -1952,11 +1953,13 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
19521953
SavedStrings.push_back(Filename.data());
19531954
}
19541955

1956+
bool Included = PP->alreadyIncluded(File);
1957+
19551958
HeaderFileInfoTrait::key_type Key = {
19561959
Filename, File->getSize(), getTimestampForOutput(File)
19571960
};
19581961
HeaderFileInfoTrait::data_type Data = {
1959-
*HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {}
1962+
*HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
19601963
};
19611964
Generator.insert(Key, Data, GeneratorTrait);
19621965
++NumHeaderSearchEntries;
@@ -2262,29 +2265,6 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
22622265
return false;
22632266
}
22642267

2265-
void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
2266-
using namespace llvm::support;
2267-
2268-
const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
2269-
2270-
std::vector<uint32_t> IncludedInputFileIDs;
2271-
IncludedInputFileIDs.reserve(IncludedFiles.size());
2272-
2273-
for (const FileEntry *File : IncludedFiles) {
2274-
auto InputFileIt = InputFileIDs.find(File);
2275-
if (InputFileIt == InputFileIDs.end())
2276-
continue;
2277-
IncludedInputFileIDs.push_back(InputFileIt->second);
2278-
}
2279-
2280-
llvm::sort(IncludedInputFileIDs);
2281-
2282-
endian::Writer LE(Out, little);
2283-
LE.write<uint32_t>(IncludedInputFileIDs.size());
2284-
for (uint32_t ID : IncludedInputFileIDs)
2285-
LE.write<uint32_t>(ID);
2286-
}
2287-
22882268
/// Writes the block containing the serialized form of the
22892269
/// preprocessor.
22902270
void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2533,20 +2513,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
25332513
MacroOffsetsBase - ASTBlockStartOffset};
25342514
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
25352515
}
2536-
2537-
{
2538-
auto Abbrev = std::make_shared<BitCodeAbbrev>();
2539-
Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
2540-
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
2541-
unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
2542-
2543-
SmallString<2048> Buffer;
2544-
raw_svector_ostream Out(Buffer);
2545-
writeIncludedFiles(Out, PP);
2546-
RecordData::value_type Record[] = {PP_INCLUDED_FILES};
2547-
Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
2548-
Buffer.size());
2549-
}
25502516
}
25512517

25522518
void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,

0 commit comments

Comments
 (0)