Skip to content

Commit 531de5d

Browse files
committed
[clang][modules] Mark fewer identifiers as out-of-date
In `clang-scan-deps` contexts, the number of interesting identifiers in PCM files is fairly low (only macros), while the number of identifiers in the importing instance is high (builtins). Marking the whole identifier table out-of-date triggers lots of benign and expensive calls to `ASTReader::updateOutOfDateIdentifiers()`. (That unfortunately happens even for unused identifiers due to `SemaRef.IdResolver.begin(II)` line in `ASTWriter::WriteASTCore()`.) This patch makes the main code path more similar to C++ modules, where the PCM files have `INTERESTING_IDENTIFIERS` section which lists identifiers that get created in the identifier table of the importing instance and marked as out-of-date. The only difference is that the main code path doesn't *create* identifiers in the table and relies on the importing instance calling `ASTReader::get()` when creating new identifier on-demand. It only marks existing identifiers as out-of-date. This speeds up `clang-scan-deps` by 5-10%. Reviewed By: Bigcheese, benlangmuir Differential Revision: https://reviews.llvm.org/D151277 (cherry picked from commit c68ba12)
1 parent 23a80f8 commit 531de5d

File tree

2 files changed

+41
-30
lines changed

2 files changed

+41
-30
lines changed

clang/lib/Serialization/ASTReader.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4343,27 +4343,56 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
43434343
if (F.OriginalSourceFileID.isValid())
43444344
F.OriginalSourceFileID = TranslateFileID(F, F.OriginalSourceFileID);
43454345

4346-
// Preload all the pending interesting identifiers by marking them out of
4347-
// date.
43484346
for (auto Offset : F.PreloadIdentifierOffsets) {
43494347
const unsigned char *Data = F.IdentifierTableData + Offset;
43504348

43514349
ASTIdentifierLookupTrait Trait(*this, F);
43524350
auto KeyDataLen = Trait.ReadKeyDataLength(Data);
43534351
auto Key = Trait.ReadKey(Data, KeyDataLen.first);
4354-
auto &II = PP.getIdentifierTable().getOwn(Key);
4355-
II.setOutOfDate(true);
4352+
4353+
IdentifierInfo *II;
4354+
if (!PP.getLangOpts().CPlusPlus) {
4355+
// Identifiers present in both the module file and the importing
4356+
// instance are marked out-of-date so that they can be deserialized
4357+
// on next use via ASTReader::updateOutOfDateIdentifier().
4358+
// Identifiers present in the module file but not in the importing
4359+
// instance are ignored for now, preventing growth of the identifier
4360+
// table. They will be deserialized on first use via ASTReader::get().
4361+
auto It = PP.getIdentifierTable().find(Key);
4362+
if (It == PP.getIdentifierTable().end())
4363+
continue;
4364+
II = It->second;
4365+
} else {
4366+
// With C++ modules, not many identifiers are considered interesting.
4367+
// All identifiers in the module file can be placed into the identifier
4368+
// table of the importing instance and marked as out-of-date. This makes
4369+
// ASTReader::get() a no-op, and deserialization will take place on
4370+
// first/next use via ASTReader::updateOutOfDateIdentifier().
4371+
II = &PP.getIdentifierTable().getOwn(Key);
4372+
}
4373+
4374+
II->setOutOfDate(true);
43564375

43574376
// Mark this identifier as being from an AST file so that we can track
43584377
// whether we need to serialize it.
4359-
markIdentifierFromAST(*this, II);
4378+
markIdentifierFromAST(*this, *II);
43604379

43614380
// Associate the ID with the identifier so that the writer can reuse it.
43624381
auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
4363-
SetIdentifierInfo(ID, &II);
4382+
SetIdentifierInfo(ID, II);
43644383
}
43654384
}
43664385

4386+
// Builtins and library builtins have already been initialized. Mark all
4387+
// identifiers as out-of-date, so that they are deserialized on first use.
4388+
if (Type == MK_PCH || Type == MK_Preamble || Type == MK_MainFile)
4389+
for (auto &Id : PP.getIdentifierTable())
4390+
Id.second->setOutOfDate(true);
4391+
4392+
// Mark selectors as out of date.
4393+
for (const auto &Sel : SelectorGeneration)
4394+
SelectorOutOfDate[Sel.first] = true;
4395+
43674396
// Setup the import locations and notify the module manager that we've
43684397
// committed to these module files.
43694398
for (ImportedModule &M : Loaded) {
@@ -4381,25 +4410,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
43814410
F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
43824411
}
43834412

4384-
if (!PP.getLangOpts().CPlusPlus ||
4385-
(Type != MK_ImplicitModule && Type != MK_ExplicitModule &&
4386-
Type != MK_PrebuiltModule)) {
4387-
// Mark all of the identifiers in the identifier table as being out of date,
4388-
// so that various accessors know to check the loaded modules when the
4389-
// identifier is used.
4390-
//
4391-
// For C++ modules, we don't need information on many identifiers (just
4392-
// those that provide macros or are poisoned), so we mark all of
4393-
// the interesting ones via PreloadIdentifierOffsets.
4394-
for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
4395-
IdEnd = PP.getIdentifierTable().end();
4396-
Id != IdEnd; ++Id)
4397-
Id->second->setOutOfDate(true);
4398-
}
4399-
// Mark selectors as out of date.
4400-
for (auto Sel : SelectorGeneration)
4401-
SelectorOutOfDate[Sel.first] = true;
4402-
44034413
// Resolve any unresolved module exports.
44044414
for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
44054415
UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3575,14 +3575,16 @@ class ASTIdentifierTableTrait {
35753575
// the mapping from persistent IDs to strings.
35763576
Writer.SetIdentifierOffset(II, Out.tell());
35773577

3578+
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
3579+
35783580
// Emit the offset of the key/data length information to the interesting
35793581
// identifiers table if necessary.
3580-
if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
3582+
if (InterestingIdentifierOffsets &&
3583+
isInterestingIdentifier(II, MacroOffset))
35813584
InterestingIdentifierOffsets->push_back(Out.tell());
35823585

35833586
unsigned KeyLen = II->getLength() + 1;
35843587
unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
3585-
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
35863588
if (isInterestingIdentifier(II, MacroOffset)) {
35873589
DataLen += 2; // 2 bytes for builtin ID
35883590
DataLen += 2; // 2 bytes for flags
@@ -3666,9 +3668,8 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
36663668
// strings.
36673669
{
36683670
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
3669-
ASTIdentifierTableTrait Trait(
3670-
*this, PP, IdResolver, IsModule,
3671-
(getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
3671+
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule,
3672+
IsModule ? &InterestingIdents : nullptr);
36723673

36733674
// Look for any identifiers that were named while processing the
36743675
// headers, but are otherwise not needed. We add these to the hash

0 commit comments

Comments
 (0)