Skip to content

Commit 9c25418

Browse files
committed
[modules] Don't save uninteresting identifiers, and don't consider identifiers
to be interesting just because they are the name of a builtin. Reduces the size of an empty module by over 80% (~100KB). llvm-svn: 242650
1 parent ea4ad5a commit 9c25418

File tree

7 files changed

+53
-15
lines changed

7 files changed

+53
-15
lines changed

clang/include/clang/Basic/IdentifierTable.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class IdentifierInfo {
161161
/// TokenID is normally read-only but there are 2 instances where we revert it
162162
/// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
163163
/// using this method so we can inform serialization about it.
164-
void RevertTokenIDToIdentifier() {
164+
void revertTokenIDToIdentifier() {
165165
assert(TokenID != tok::identifier && "Already at tok::identifier");
166166
TokenID = tok::identifier;
167167
RevertedTokenID = true;
@@ -183,6 +183,18 @@ class IdentifierInfo {
183183
}
184184
void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
185185

186+
/// \brief True if setNotBuiltin() was called.
187+
bool hasRevertedBuiltin() const {
188+
return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
189+
}
190+
191+
/// \brief Revert the identifier to a non-builtin identifier. We do this if
192+
/// the name of a known builtin library function is used to declare that
193+
/// function, but an unexpected type is specified.
194+
void revertBuiltin() {
195+
setBuiltinID(0);
196+
}
197+
186198
/// \brief Return a value indicating whether this is a builtin function.
187199
///
188200
/// 0 is not-built-in. 1 is builtin-for-some-nonprimary-target.

clang/include/clang/Serialization/Module.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,11 @@ class ModuleFile {
476476
/// any point during translation.
477477
bool isDirectlyImported() const { return DirectlyImported; }
478478

479+
/// \brief Is this a module file for a module (rather than a PCH or similar).
480+
bool isModule() const {
481+
return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule;
482+
}
483+
479484
/// \brief Dump debugging output for this module.
480485
void dump();
481486
};

clang/lib/Parse/Parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1489,7 +1489,7 @@ bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
14891489
<< PP.getSpelling(Tok)
14901490
<< DisableKeyword;
14911491
if (DisableKeyword)
1492-
Tok.getIdentifierInfo()->RevertTokenIDToIdentifier();
1492+
Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
14931493
Tok.setKind(tok::identifier);
14941494
return true;
14951495
}

clang/lib/Sema/SemaDecl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3115,7 +3115,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
31153115
// remain visible, a single bogus local redeclaration (which is
31163116
// actually only a warning) could break all the downstream code.
31173117
if (!New->getLexicalDeclContext()->isFunctionOrMethod())
3118-
New->getIdentifier()->setBuiltinID(Builtin::NotBuiltin);
3118+
New->getIdentifier()->revertBuiltin();
31193119

31203120
return false;
31213121
}

clang/lib/Serialization/ASTReader.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -735,10 +735,10 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
735735
}
736736

737737
/// \brief Whether the given identifier is "interesting".
738-
static bool isInterestingIdentifier(IdentifierInfo &II) {
738+
static bool isInterestingIdentifier(IdentifierInfo &II, bool IsModule) {
739739
return II.hadMacroDefinition() ||
740740
II.isPoisoned() ||
741-
II.getObjCOrBuiltinID() ||
741+
(IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
742742
II.hasRevertedTokenIDToIdentifier() ||
743743
II.getFETokenInfo<void>();
744744
}
@@ -767,7 +767,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
767767
}
768768
if (!II->isFromAST()) {
769769
II->setIsFromAST();
770-
if (isInterestingIdentifier(*II))
770+
if (isInterestingIdentifier(*II, F.isModule()))
771771
II->setChangedSinceDeserialization();
772772
}
773773
Reader.markIdentifierUpToDate(II);
@@ -784,6 +784,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
784784
unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
785785
bool CPlusPlusOperatorKeyword = readBit(Bits);
786786
bool HasRevertedTokenIDToIdentifier = readBit(Bits);
787+
bool HasRevertedBuiltin = readBit(Bits);
787788
bool Poisoned = readBit(Bits);
788789
bool ExtensionToken = readBit(Bits);
789790
bool HadMacroDefinition = readBit(Bits);
@@ -794,8 +795,15 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
794795
// Set or check the various bits in the IdentifierInfo structure.
795796
// Token IDs are read-only.
796797
if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
797-
II->RevertTokenIDToIdentifier();
798-
II->setObjCOrBuiltinID(ObjCOrBuiltinID);
798+
II->revertTokenIDToIdentifier();
799+
if (!F.isModule())
800+
II->setObjCOrBuiltinID(ObjCOrBuiltinID);
801+
else if (HasRevertedBuiltin && II->getBuiltinID()) {
802+
II->revertBuiltin();
803+
assert((II->hasRevertedBuiltin() ||
804+
II->getObjCOrBuiltinID() == ObjCOrBuiltinID) &&
805+
"Incorrect ObjC keyword or builtin ID");
806+
}
799807
assert(II->isExtensionToken() == ExtensionToken &&
800808
"Incorrect extension token flag");
801809
(void)ExtensionToken;

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3102,15 +3102,16 @@ class ASTIdentifierTableTrait {
31023102
ASTWriter &Writer;
31033103
Preprocessor &PP;
31043104
IdentifierResolver &IdResolver;
3105+
bool IsModule;
31053106

31063107
/// \brief Determines whether this is an "interesting" identifier that needs a
31073108
/// full IdentifierInfo structure written into the hash table. Notably, this
31083109
/// doesn't check whether the name has macros defined; use PublicMacroIterator
31093110
/// to check that.
3110-
bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) {
3111+
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
31113112
if (MacroOffset ||
31123113
II->isPoisoned() ||
3113-
II->getObjCOrBuiltinID() ||
3114+
(IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
31143115
II->hasRevertedTokenIDToIdentifier() ||
31153116
II->getFETokenInfo<void>())
31163117
return true;
@@ -3129,13 +3130,17 @@ class ASTIdentifierTableTrait {
31293130
typedef unsigned offset_type;
31303131

31313132
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
3132-
IdentifierResolver &IdResolver)
3133-
: Writer(Writer), PP(PP), IdResolver(IdResolver) {}
3133+
IdentifierResolver &IdResolver, bool IsModule)
3134+
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule) {}
31343135

31353136
static hash_value_type ComputeHash(const IdentifierInfo* II) {
31363137
return llvm::HashString(II->getName());
31373138
}
31383139

3140+
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
3141+
return isInterestingIdentifier(II, 0);
3142+
}
3143+
31393144
std::pair<unsigned,unsigned>
31403145
EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
31413146
unsigned KeyLen = II->getLength() + 1;
@@ -3192,6 +3197,7 @@ class ASTIdentifierTableTrait {
31923197
Bits = (Bits << 1) | unsigned(HadMacroDefinition);
31933198
Bits = (Bits << 1) | unsigned(II->isExtensionToken());
31943199
Bits = (Bits << 1) | unsigned(II->isPoisoned());
3200+
Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
31953201
Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
31963202
Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
31973203
LE.write<uint16_t>(Bits);
@@ -3229,7 +3235,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
32293235
// strings.
32303236
{
32313237
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
3232-
ASTIdentifierTableTrait Trait(*this, PP, IdResolver);
3238+
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
32333239

32343240
// Look for any identifiers that were named while processing the
32353241
// headers, but are otherwise not needed. We add these to the hash
@@ -3245,7 +3251,8 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
32453251
// that their order is stable.
32463252
std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
32473253
for (const IdentifierInfo *II : IIs)
3248-
getIdentifierRef(II);
3254+
if (Trait.isInterestingNonMacroIdentifier(II))
3255+
getIdentifierRef(II);
32493256

32503257
// Create the on-disk hash table representation. We only store offsets
32513258
// for identifiers that appear here for the first time.
@@ -4444,6 +4451,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
44444451
WriteHeaderSearch(PP.getHeaderSearchInfo());
44454452
WriteSelectors(SemaRef);
44464453
WriteReferencedSelectorsPool(SemaRef);
4454+
WriteLateParsedTemplates(SemaRef);
44474455
WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
44484456
WriteFPPragmaOptions(SemaRef.getFPOptions());
44494457
WriteOpenCLExtensions(SemaRef);
@@ -4559,7 +4567,6 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
45594567
WriteDeclReplacementsBlock();
45604568
WriteRedeclarations();
45614569
WriteObjCCategories();
4562-
WriteLateParsedTemplates(SemaRef);
45634570
if(!WritingModule)
45644571
WriteOptimizePragmaOptions(SemaRef);
45654572

clang/test/Modules/empty.modulemap

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
// RUN: -emit-module -fmodule-name=empty -o %t/check.pcm \
1111
// RUN: %s
1212
//
13+
// The module file should be identical each time we produce it.
1314
// RUN: diff %t/base.pcm %t/check.pcm
15+
//
16+
// We expect an empty module to be less than 30KB.
17+
// REQUIRES: shell
18+
// RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s
19+
// CHECK-SIZE: {{^[12][0-9]{4} }}
1420

1521
module empty { header "Inputs/empty.h" export * }

0 commit comments

Comments
 (0)