Skip to content

Commit c68ba12

Browse files
committed
[clang][modules] Mark fewer identifiers as out-of-date
In `clang-scan-deps` contexts, the number of interesting identifiers in PCM files is fairly low (only macros), while the number of identifiers in the importing instance is high (builtins). Marking the whole identifier table out-of-date triggers lots of benign and expensive calls to `ASTReader::updateOutOfDateIdentifiers()`. (That unfortunately happens even for unused identifiers due to `SemaRef.IdResolver.begin(II)` line in `ASTWriter::WriteASTCore()`.) This patch makes the main code path more similar to C++ modules, where the PCM files have `INTERESTING_IDENTIFIERS` section which lists identifiers that get created in the identifier table of the importing instance and marked as out-of-date. The only difference is that the main code path doesn't *create* identifiers in the table and relies on the importing instance calling `ASTReader::get()` when creating new identifier on-demand. It only marks existing identifiers as out-of-date. This speeds up `clang-scan-deps` by 5-10%. Reviewed By: Bigcheese, benlangmuir Differential Revision: https://reviews.llvm.org/D151277
1 parent 7f5b15a commit c68ba12

File tree

2 files changed

+41
-30
lines changed

2 files changed

+41
-30
lines changed

clang/lib/Serialization/ASTReader.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4386,27 +4386,56 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
43864386
if (F.OriginalSourceFileID.isValid())
43874387
F.OriginalSourceFileID = TranslateFileID(F, F.OriginalSourceFileID);
43884388

4389-
// Preload all the pending interesting identifiers by marking them out of
4390-
// date.
43914389
for (auto Offset : F.PreloadIdentifierOffsets) {
43924390
const unsigned char *Data = F.IdentifierTableData + Offset;
43934391

43944392
ASTIdentifierLookupTrait Trait(*this, F);
43954393
auto KeyDataLen = Trait.ReadKeyDataLength(Data);
43964394
auto Key = Trait.ReadKey(Data, KeyDataLen.first);
4397-
auto &II = PP.getIdentifierTable().getOwn(Key);
4398-
II.setOutOfDate(true);
4395+
4396+
IdentifierInfo *II;
4397+
if (!PP.getLangOpts().CPlusPlus) {
4398+
// Identifiers present in both the module file and the importing
4399+
// instance are marked out-of-date so that they can be deserialized
4400+
// on next use via ASTReader::updateOutOfDateIdentifier().
4401+
// Identifiers present in the module file but not in the importing
4402+
// instance are ignored for now, preventing growth of the identifier
4403+
// table. They will be deserialized on first use via ASTReader::get().
4404+
auto It = PP.getIdentifierTable().find(Key);
4405+
if (It == PP.getIdentifierTable().end())
4406+
continue;
4407+
II = It->second;
4408+
} else {
4409+
// With C++ modules, not many identifiers are considered interesting.
4410+
// All identifiers in the module file can be placed into the identifier
4411+
// table of the importing instance and marked as out-of-date. This makes
4412+
// ASTReader::get() a no-op, and deserialization will take place on
4413+
// first/next use via ASTReader::updateOutOfDateIdentifier().
4414+
II = &PP.getIdentifierTable().getOwn(Key);
4415+
}
4416+
4417+
II->setOutOfDate(true);
43994418

44004419
// Mark this identifier as being from an AST file so that we can track
44014420
// whether we need to serialize it.
4402-
markIdentifierFromAST(*this, II);
4421+
markIdentifierFromAST(*this, *II);
44034422

44044423
// Associate the ID with the identifier so that the writer can reuse it.
44054424
auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
4406-
SetIdentifierInfo(ID, &II);
4425+
SetIdentifierInfo(ID, II);
44074426
}
44084427
}
44094428

4429+
// Builtins and library builtins have already been initialized. Mark all
4430+
// identifiers as out-of-date, so that they are deserialized on first use.
4431+
if (Type == MK_PCH || Type == MK_Preamble || Type == MK_MainFile)
4432+
for (auto &Id : PP.getIdentifierTable())
4433+
Id.second->setOutOfDate(true);
4434+
4435+
// Mark selectors as out of date.
4436+
for (const auto &Sel : SelectorGeneration)
4437+
SelectorOutOfDate[Sel.first] = true;
4438+
44104439
// Setup the import locations and notify the module manager that we've
44114440
// committed to these module files.
44124441
for (ImportedModule &M : Loaded) {
@@ -4424,25 +4453,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
44244453
F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
44254454
}
44264455

4427-
if (!PP.getLangOpts().CPlusPlus ||
4428-
(Type != MK_ImplicitModule && Type != MK_ExplicitModule &&
4429-
Type != MK_PrebuiltModule)) {
4430-
// Mark all of the identifiers in the identifier table as being out of date,
4431-
// so that various accessors know to check the loaded modules when the
4432-
// identifier is used.
4433-
//
4434-
// For C++ modules, we don't need information on many identifiers (just
4435-
// those that provide macros or are poisoned), so we mark all of
4436-
// the interesting ones via PreloadIdentifierOffsets.
4437-
for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
4438-
IdEnd = PP.getIdentifierTable().end();
4439-
Id != IdEnd; ++Id)
4440-
Id->second->setOutOfDate(true);
4441-
}
4442-
// Mark selectors as out of date.
4443-
for (const auto &Sel : SelectorGeneration)
4444-
SelectorOutOfDate[Sel.first] = true;
4445-
44464456
// Resolve any unresolved module exports.
44474457
for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
44484458
UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3573,14 +3573,16 @@ class ASTIdentifierTableTrait {
35733573
// the mapping from persistent IDs to strings.
35743574
Writer.SetIdentifierOffset(II, Out.tell());
35753575

3576+
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
3577+
35763578
// Emit the offset of the key/data length information to the interesting
35773579
// identifiers table if necessary.
3578-
if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
3580+
if (InterestingIdentifierOffsets &&
3581+
isInterestingIdentifier(II, MacroOffset))
35793582
InterestingIdentifierOffsets->push_back(Out.tell());
35803583

35813584
unsigned KeyLen = II->getLength() + 1;
35823585
unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
3583-
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
35843586
if (isInterestingIdentifier(II, MacroOffset)) {
35853587
DataLen += 2; // 2 bytes for builtin ID
35863588
DataLen += 2; // 2 bytes for flags
@@ -3659,9 +3661,8 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
36593661
// strings.
36603662
{
36613663
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
3662-
ASTIdentifierTableTrait Trait(
3663-
*this, PP, IdResolver, IsModule,
3664-
(getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
3664+
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule,
3665+
IsModule ? &InterestingIdents : nullptr);
36653666

36663667
// Look for any identifiers that were named while processing the
36673668
// headers, but are otherwise not needed. We add these to the hash

0 commit comments

Comments
 (0)