Skip to content

Commit d4d95ee

Browse files
committed
[Serialization] Register identifiers in ahead and don't emit predefined decls
See the added test for the motivation example. In that example, we add a new function declaration in `a.cppm` and this is not used in the reduced BMI of `b.cppm`. We expect that the change won't affect the BMI of `b.cppm`. But it is the not the case. There are 2 reason for unexpected result: 1. We would register the interesting identifiers in a pretty late phase. This may cause some some predefined identifier ID change due to we insert other identifiers during emitting decls and types. 2. In `GenerateNameLookup`, we would generate information for predefined decls. This may not be intended. Since every predefined decl doesn't belong to any module. And this patch solves the first issue by registering the identifiers in the very early posititon to make sure the ID won't get affected by the process to emit decls and types. And we solve the second question by filtering predefined decls simply.
1 parent 2b5d1fb commit d4d95ee

File tree

4 files changed

+105
-46
lines changed

4 files changed

+105
-46
lines changed

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ class ASTWriter : public ASTDeserializationListener,
228228
/// unit, while 0 is reserved for NULL.
229229
llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
230230

231+
/// Set of predefined decls. This is a helper data to determine if a decl
232+
/// is predefined. It should be more clear and safer to query the set
233+
/// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
234+
llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
235+
231236
/// Offset of each declaration in the bitstream, indexed by
232237
/// the declaration's ID.
233238
std::vector<serialization::DeclOffset> DeclOffsets;
@@ -563,8 +568,6 @@ class ASTWriter : public ASTDeserializationListener,
563568
void WriteType(QualType T);
564569

565570
bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
566-
bool isLookupResultEntirelyExternalOrUnreachable(StoredDeclsList &Result,
567-
DeclContext *DC);
568571

569572
void GenerateNameLookupTable(const DeclContext *DC,
570573
llvm::SmallVectorImpl<char> &LookupTable);
@@ -844,6 +847,8 @@ class ASTWriter : public ASTDeserializationListener,
844847
bool hasChain() const { return Chain; }
845848
ASTReader *getChain() const { return Chain; }
846849

850+
bool isWritingModule() const { return WritingModule; }
851+
847852
bool isWritingStdCXXNamedModules() const {
848853
return WritingModule && WritingModule->isNamedModule();
849854
}
@@ -852,6 +857,10 @@ class ASTWriter : public ASTDeserializationListener,
852857

853858
bool getDoneWritingDeclsAndTypes() const { return DoneWritingDeclsAndTypes; }
854859

860+
bool isDeclPredefined(const Decl *D) const {
861+
return PredefinedDecls.count(D);
862+
}
863+
855864
void handleVTable(CXXRecordDecl *RD);
856865

857866
private:

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 58 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3728,6 +3728,29 @@ static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts,
37283728

37293729
namespace {
37303730

3731+
bool IsInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset,
3732+
bool IsModule, bool IsCPlusPlus) {
3733+
bool NeedDecls = !IsModule || !IsCPlusPlus;
3734+
3735+
bool IsInteresting =
3736+
II->getNotableIdentifierID() != tok::NotableIdentifierKind::not_notable ||
3737+
II->getBuiltinID() != Builtin::ID::NotBuiltin ||
3738+
II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
3739+
if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
3740+
II->hasRevertedTokenIDToIdentifier() ||
3741+
(NeedDecls && II->getFETokenInfo()))
3742+
return true;
3743+
3744+
return false;
3745+
}
3746+
3747+
bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II,
3748+
ASTWriter &Writer) {
3749+
bool IsModule = Writer.isWritingModule();
3750+
bool IsCPlusPlus = Writer.getLangOpts().CPlusPlus;
3751+
return IsInterestingIdentifier(II, /*MacroOffset=*/0, IsModule, IsCPlusPlus);
3752+
}
3753+
37313754
class ASTIdentifierTableTrait {
37323755
ASTWriter &Writer;
37333756
Preprocessor &PP;
@@ -3741,17 +3764,8 @@ class ASTIdentifierTableTrait {
37413764
/// doesn't check whether the name has macros defined; use PublicMacroIterator
37423765
/// to check that.
37433766
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
3744-
bool IsInteresting =
3745-
II->getNotableIdentifierID() !=
3746-
tok::NotableIdentifierKind::not_notable ||
3747-
II->getBuiltinID() != Builtin::ID::NotBuiltin ||
3748-
II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
3749-
if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
3750-
II->hasRevertedTokenIDToIdentifier() ||
3751-
(NeedDecls && II->getFETokenInfo()))
3752-
return true;
3753-
3754-
return false;
3767+
return IsInterestingIdentifier(II, MacroOffset, IsModule,
3768+
Writer.getLangOpts().CPlusPlus);
37553769
}
37563770

37573771
public:
@@ -3782,10 +3796,6 @@ class ASTIdentifierTableTrait {
37823796
return isInterestingIdentifier(II, MacroOffset);
37833797
}
37843798

3785-
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
3786-
return isInterestingIdentifier(II, 0);
3787-
}
3788-
37893799
std::pair<unsigned, unsigned>
37903800
EmitKeyDataLength(raw_ostream &Out, const IdentifierInfo *II, IdentifierID ID) {
37913801
// Record the location of the identifier data. This is used when generating
@@ -3887,21 +3897,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
38873897
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule,
38883898
IsModule ? &InterestingIdents : nullptr);
38893899

3890-
// Look for any identifiers that were named while processing the
3891-
// headers, but are otherwise not needed. We add these to the hash
3892-
// table to enable checking of the predefines buffer in the case
3893-
// where the user adds new macro definitions when building the AST
3894-
// file.
3895-
SmallVector<const IdentifierInfo *, 128> IIs;
3896-
for (const auto &ID : PP.getIdentifierTable())
3897-
if (Trait.isInterestingNonMacroIdentifier(ID.second))
3898-
IIs.push_back(ID.second);
3899-
// Sort the identifiers lexicographically before getting the references so
3900-
// that their order is stable.
3901-
llvm::sort(IIs, llvm::deref<std::less<>>());
3902-
for (const IdentifierInfo *II : IIs)
3903-
getIdentifierRef(II);
3904-
39053900
// Create the on-disk hash table representation. We only store offsets
39063901
// for identifiers that appear here for the first time.
39073902
IdentifierOffsets.resize(NextIdentID - FirstIdentID);
@@ -4125,16 +4120,24 @@ bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result,
41254120
DC->hasNeedToReconcileExternalVisibleStorage();
41264121
}
41274122

4128-
bool ASTWriter::isLookupResultEntirelyExternalOrUnreachable(
4129-
StoredDeclsList &Result, DeclContext *DC) {
4123+
/// Returns ture if all of the lookup result are either external, not emitted or
4124+
/// predefined. In such cases, the lookup result is not interesting and we don't
4125+
/// need to record the result in the current being written module. Return false
4126+
/// otherwise.
4127+
static bool isLookupResultNotInteresting(ASTWriter &Writer,
4128+
StoredDeclsList &Result) {
41304129
for (auto *D : Result.getLookupResult()) {
4131-
auto *LocalD = getDeclForLocalLookup(getLangOpts(), D);
4130+
auto *LocalD = getDeclForLocalLookup(Writer.getLangOpts(), D);
41324131
if (LocalD->isFromASTFile())
41334132
continue;
41344133

41354134
// We can only be sure whether the local declaration is reachable
41364135
// after we done writing the declarations and types.
4137-
if (DoneWritingDeclsAndTypes && !wasDeclEmitted(LocalD))
4136+
if (Writer.getDoneWritingDeclsAndTypes() && !Writer.wasDeclEmitted(LocalD))
4137+
continue;
4138+
4139+
// We don't need to emit the predefined decls.
4140+
if (Writer.isDeclPredefined(LocalD))
41384141
continue;
41394142

41404143
return false;
@@ -4182,11 +4185,11 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
41824185
// that entirely external or unreachable.
41834186
//
41844187
// FIMXE: It looks sufficient to test
4185-
// isLookupResultEntirelyExternalOrUnreachable here. But due to bug we have
4188+
// isLookupResultNotInteresting here. But due to bug we have
41864189
// to test isLookupResultExternal here. See
41874190
// https://github.com/llvm/llvm-project/issues/61065 for details.
41884191
if ((GeneratingReducedBMI || isLookupResultExternal(Result, DC)) &&
4189-
isLookupResultEntirelyExternalOrUnreachable(Result, DC))
4192+
isLookupResultNotInteresting(*this, Result))
41904193
continue;
41914194

41924195
// We also skip empty results. If any of the results could be external and
@@ -5007,6 +5010,7 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
50075010
if (D) {
50085011
assert(D->isCanonicalDecl() && "predefined decl is not canonical");
50095012
DeclIDs[D] = ID;
5013+
PredefinedDecls.insert(D);
50105014
}
50115015
};
50125016
RegisterPredefDecl(Context.getTranslationUnitDecl(),
@@ -5355,6 +5359,24 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
53555359

53565360
writeUnhashedControlBlock(PP, Context);
53575361

5362+
// Look for any identifiers that were named while processing the
5363+
// headers, but are otherwise not needed. We add these to the hash
5364+
// table to enable checking of the predefines buffer in the case
5365+
// where the user adds new macro definitions when building the AST
5366+
// file.
5367+
//
5368+
// We do this before emitting any Decl and Types to make sure the
5369+
// Identifier ID is stable.
5370+
SmallVector<const IdentifierInfo *, 128> IIs;
5371+
for (const auto &ID : PP.getIdentifierTable())
5372+
if (IsInterestingNonMacroIdentifier(ID.second, *this))
5373+
IIs.push_back(ID.second);
5374+
// Sort the identifiers lexicographically before getting the references so
5375+
// that their order is stable.
5376+
llvm::sort(IIs, llvm::deref<std::less<>>());
5377+
for (const IdentifierInfo *II : IIs)
5378+
getIdentifierRef(II);
5379+
53585380
// Write the set of weak, undeclared identifiers. We always write the
53595381
// entire table, since later PCH files in a PCH chain are only interested in
53605382
// the results at the end of the chain.

clang/test/Modules/decl-params-determinisim.m

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,23 @@
2828

2929
// CHECK: <TYPE_FUNCTION_PROTO
3030
// CHECK-NEXT: <DECL_PARM_VAR
31-
// CHECK-SAME: op5=2
31+
// CHECK-SAME: op5=13
3232
// CHECK-NEXT: <DECL_PARM_VAR
33-
// CHECK-SAME: op5=3
33+
// CHECK-SAME: op5=14
3434
// CHECK-NEXT: <DECL_PARM_VAR
35-
// CHECK-SAME: op5=4
35+
// CHECK-SAME: op5=15
3636
// CHECK-NEXT: <DECL_PARM_VAR
37-
// CHECK-SAME: op5=5
37+
// CHECK-SAME: op5=16
3838

3939
/// Decl records start at 43
4040
// CHECK: <DECL_RECORD
41-
// CHECK-SAME: op5=43
41+
// CHECK-SAME: op5=54
4242
// CHECK-NEXT: <DECL_RECORD
43-
// CHECK-SAME: op5=44
43+
// CHECK-SAME: op5=55
4444
// CHECK-NEXT: <DECL_RECORD
45-
// CHECK-SAME: op5=45
45+
// CHECK-SAME: op5=56
4646
// CHECK-NEXT: <DECL_RECORD
47-
// CHECK-SAME: op5=46
47+
// CHECK-SAME: op5=57
4848

4949
//--- headers/a.h
5050
void f(struct A0 *a0,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Test that adding a new identifier within reduced BMI may not produce a transitive change.
2+
//
3+
// RUN: rm -rf %t
4+
// RUN: split-file %s %t
5+
//
6+
// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o %t/A.pcm
7+
// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.pcm \
8+
// RUN: -fmodule-file=A=%t/A.pcm
9+
//
10+
// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.v1.cppm -o %t/A.v1.pcm
11+
// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.v1.pcm \
12+
// RUN: -fmodule-file=A=%t/A.v1.pcm
13+
//
14+
// RUN: diff %t/B.pcm %t/B.v1.pcm &> /dev/null
15+
16+
//--- A.cppm
17+
export module A;
18+
export int a();
19+
20+
//--- A.v1.cppm
21+
export module A;
22+
export int a();
23+
export int a2();
24+
25+
//--- B.cppm
26+
export module B;
27+
import A;
28+
export int b() { return a(); }

0 commit comments

Comments
 (0)