Skip to content

Commit 33e0f7e

Browse files
committed
[modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended.
llvm-svn: 242868
1 parent ccc025b commit 33e0f7e

File tree

7 files changed

+115
-24
lines changed

7 files changed

+115
-24
lines changed

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,10 @@ namespace clang {
543543
/// macro definition.
544544
MACRO_OFFSET = 47,
545545

546-
// ID 48 used to be a table of macros.
546+
/// \brief A list of "interesting" identifiers. Only used in C++ (where we
547+
/// don't normally do lookups into the serialized identifier table). These
548+
/// are eagerly deserialized.
549+
INTERESTING_IDENTIFIERS = 48,
547550

548551
/// \brief Record code for undefined but used functions and variables that
549552
/// need a definition in this TU.

clang/include/clang/Serialization/Module.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,10 @@ class ModuleFile {
270270
/// IdentifierHashTable.
271271
void *IdentifierLookupTable;
272272

273+
/// \brief Offsets of identifiers that we're going to preload within
274+
/// IdentifierTableData.
275+
std::vector<unsigned> PreloadIdentifierOffsets;
276+
273277
// === Macros ===
274278

275279
/// \brief The cursor to the start of the preprocessor block, which stores

clang/include/clang/Serialization/ModuleManager.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,19 @@ namespace serialization {
3030

3131
/// \brief Manages the set of modules loaded by an AST reader.
3232
class ModuleManager {
33-
/// \brief The chain of AST files. The first entry is the one named by the
34-
/// user, the last one is the one that doesn't depend on anything further.
33+
/// \brief The chain of AST files, in the order in which we started to load
34+
/// them (this order isn't really useful for anything).
3535
SmallVector<ModuleFile *, 2> Chain;
3636

37+
/// \brief The chain of non-module PCH files. The first entry is the one named
38+
/// by the user, the last one is the one that doesn't depend on anything
39+
/// further.
40+
SmallVector<ModuleFile *, 2> PCHChain;
41+
3742
// \brief The roots of the dependency DAG of AST files. This is used
3843
// to implement short-circuiting logic when running DFS over the dependencies.
3944
SmallVector<ModuleFile *, 2> Roots;
40-
45+
4146
/// \brief All loaded modules, indexed by name.
4247
llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
4348

@@ -138,6 +143,11 @@ class ModuleManager {
138143
ModuleReverseIterator rbegin() { return Chain.rbegin(); }
139144
/// \brief Reverse iterator end-point to traverse all loaded modules.
140145
ModuleReverseIterator rend() { return Chain.rend(); }
146+
147+
/// \brief A range covering the PCH and preamble module files loaded.
148+
llvm::iterator_range<ModuleConstIterator> pch_modules() const {
149+
return llvm::make_range(PCHChain.begin(), PCHChain.end());
150+
}
141151

142152
/// \brief Returns the primary module associated with the manager, that is,
143153
/// the first module loaded

clang/lib/Sema/Sema.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ void Sema::Initialize() {
154154
// will not be able to merge any duplicate __va_list_tag decls correctly.
155155
VAListTagName = PP.getIdentifierInfo("__va_list_tag");
156156

157+
if (!TUScope)
158+
return;
159+
157160
// Initialize predefined 128-bit integer types, if needed.
158161
if (Context.getTargetInfo().hasInt128Type()) {
159162
// If either of the 128-bit integer types are unavailable to name lookup,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
25642564
break;
25652565
}
25662566

2567+
case INTERESTING_IDENTIFIERS:
2568+
F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
2569+
break;
2570+
25672571
case EAGERLY_DESERIALIZED_DECLS:
25682572
// FIXME: Skip reading this record if our ASTConsumer doesn't care
25692573
// about "interesting" decls (for instance, if we're building a module).
@@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
34103414
// SourceManager.
34113415
SourceMgr.getLoadedSLocEntryByID(Index);
34123416
}
3417+
3418+
// Preload all the pending interesting identifiers by marking them out of
3419+
// date.
3420+
for (auto Offset : F.PreloadIdentifierOffsets) {
3421+
const unsigned char *Data = reinterpret_cast<const unsigned char *>(
3422+
F.IdentifierTableData + Offset);
3423+
3424+
ASTIdentifierLookupTrait Trait(*this, F);
3425+
auto KeyDataLen = Trait.ReadKeyDataLength(Data);
3426+
auto Key = Trait.ReadKey(Data, KeyDataLen.first);
3427+
PP.getIdentifierTable().getOwn(Key).setOutOfDate(true);
3428+
}
34133429
}
34143430

34153431
// Setup the import locations and notify the module manager that we've
@@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
34303446
M->ImportLoc.getRawEncoding());
34313447
}
34323448

3433-
// Mark all of the identifiers in the identifier table as being out of date,
3434-
// so that various accessors know to check the loaded modules when the
3435-
// identifier is used.
3436-
for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
3437-
IdEnd = PP.getIdentifierTable().end();
3438-
Id != IdEnd; ++Id)
3439-
Id->second->setOutOfDate(true);
3449+
if (!Context.getLangOpts().CPlusPlus ||
3450+
(Type != MK_ImplicitModule && Type != MK_ExplicitModule)) {
3451+
// Mark all of the identifiers in the identifier table as being out of date,
3452+
// so that various accessors know to check the loaded modules when the
3453+
// identifier is used.
3454+
//
3455+
// For C++ modules, we don't need information on many identifiers (just
3456+
// those that provide macros or are poisoned), so we mark all of
3457+
// the interesting ones via PreloadIdentifierOffsets.
3458+
for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
3459+
IdEnd = PP.getIdentifierTable().end();
3460+
Id != IdEnd; ++Id)
3461+
Id->second->setOutOfDate(true);
3462+
}
34403463

34413464
// Resolve any unresolved module exports.
34423465
for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
@@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef Name) {
68276850
// Note that we are loading an identifier.
68286851
Deserializing AnIdentifier(this);
68296852

6830-
// If there is a global index, look there first to determine which modules
6831-
// provably do not have any results for this identifier.
6832-
GlobalModuleIndex::HitSet Hits;
6833-
GlobalModuleIndex::HitSet *HitsPtr = nullptr;
6834-
if (!loadGlobalIndex()) {
6835-
if (GlobalIndex->lookupIdentifier(Name, Hits)) {
6836-
HitsPtr = &Hits;
6837-
}
6838-
}
68396853
IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
68406854
NumIdentifierLookups,
68416855
NumIdentifierLookupHits);
6842-
ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
6856+
6857+
// We don't need to do identifier table lookups in C++ modules (we preload
6858+
// all interesting declarations, and don't need to use the scope for name
6859+
// lookups). Perform the lookup in PCH files, though, since we don't build
6860+
// a complete initial identifier table if we're carrying on from a PCH.
6861+
if (Context.getLangOpts().CPlusPlus) {
6862+
for (auto F : ModuleMgr.pch_modules())
6863+
if (Visitor.visit(*F, &Visitor))
6864+
break;
6865+
} else {
6866+
// If there is a global index, look there first to determine which modules
6867+
// provably do not have any results for this identifier.
6868+
GlobalModuleIndex::HitSet Hits;
6869+
GlobalModuleIndex::HitSet *HitsPtr = nullptr;
6870+
if (!loadGlobalIndex()) {
6871+
if (GlobalIndex->lookupIdentifier(Name, Hits)) {
6872+
HitsPtr = &Hits;
6873+
}
6874+
}
6875+
6876+
ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
6877+
}
6878+
68436879
IdentifierInfo *II = Visitor.getIdentifierInfo();
68446880
markIdentifierUpToDate(II);
68456881
return II;

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait {
31043104
IdentifierResolver &IdResolver;
31053105
bool IsModule;
31063106
bool NeedDecls;
3107+
ASTWriter::RecordData *InterestingIdentifierOffsets;
31073108

31083109
/// \brief Determines whether this is an "interesting" identifier that needs a
31093110
/// full IdentifierInfo structure written into the hash table. Notably, this
@@ -3131,14 +3132,20 @@ class ASTIdentifierTableTrait {
31313132
typedef unsigned offset_type;
31323133

31333134
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
3134-
IdentifierResolver &IdResolver, bool IsModule)
3135+
IdentifierResolver &IdResolver, bool IsModule,
3136+
ASTWriter::RecordData *InterestingIdentifierOffsets)
31353137
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
3136-
NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {}
3138+
NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
3139+
InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
31373140

31383141
static hash_value_type ComputeHash(const IdentifierInfo* II) {
31393142
return llvm::HashString(II->getName());
31403143
}
31413144

3145+
bool isInterestingIdentifier(const IdentifierInfo *II) {
3146+
auto MacroOffset = Writer.getMacroDirectivesOffset(II);
3147+
return isInterestingIdentifier(II, MacroOffset);
3148+
}
31423149
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
31433150
return isInterestingIdentifier(II, 0);
31443151
}
@@ -3178,6 +3185,12 @@ class ASTIdentifierTableTrait {
31783185
// Record the location of the key data. This is used when generating
31793186
// the mapping from persistent IDs to strings.
31803187
Writer.SetIdentifierOffset(II, Out.tell());
3188+
3189+
// Emit the offset of the key/data length information to the interesting
3190+
// identifiers table if necessary.
3191+
if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
3192+
InterestingIdentifierOffsets->push_back(Out.tell() - 4);
3193+
31813194
Out.write(II->getNameStart(), KeyLen);
31823195
}
31833196

@@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
32383251
bool IsModule) {
32393252
using namespace llvm;
32403253

3254+
RecordData InterestingIdents;
3255+
32413256
// Create and write out the blob that contains the identifier
32423257
// strings.
32433258
{
32443259
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
3245-
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
3260+
ASTIdentifierTableTrait Trait(
3261+
*this, PP, IdResolver, IsModule,
3262+
(getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
32463263

32473264
// Look for any identifiers that were named while processing the
32483265
// headers, but are otherwise not needed. We add these to the hash
@@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
33163333
Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
33173334
Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
33183335
bytes(IdentifierOffsets));
3336+
3337+
// In C++, write the list of interesting identifiers (those that are
3338+
// defined as macros, poisoned, or similar unusual things).
3339+
if (!InterestingIdents.empty())
3340+
Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
33193341
}
33203342

33213343
//===----------------------------------------------------------------------===//

clang/lib/Serialization/ModuleManager.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
9595
New->File = Entry;
9696
New->ImportLoc = ImportLoc;
9797
Chain.push_back(New);
98+
if (!New->isModule())
99+
PCHChain.push_back(New);
98100
if (!ImportedBy)
99101
Roots.push_back(New);
100102
NewModule = true;
@@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
159161
Modules.erase(Entry);
160162
assert(Chain.back() == ModuleEntry);
161163
Chain.pop_back();
164+
if (!ModuleEntry->isModule())
165+
PCHChain.pop_back();
162166
if (Roots.back() == ModuleEntry)
163167
Roots.pop_back();
164168
else
@@ -225,6 +229,15 @@ void ModuleManager::removeModules(
225229

226230
// Remove the modules from the chain.
227231
Chain.erase(first, last);
232+
233+
// Also remove them from the PCH chain.
234+
for (auto I = first; I != last; ++I) {
235+
if (!(*I)->isModule()) {
236+
PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I),
237+
PCHChain.end());
238+
break;
239+
}
240+
}
228241
}
229242

230243
void

0 commit comments

Comments
 (0)