[C++20] [Modules] [Serialization] Don't reuse type ID and identifier ID from imported modules

ChuanqiXu9 · AlexisPerry · commit f38b2cb6ed5d · 2024-06-27T14:49:54.000-06:00
To support no-transitive-change model for named modules, we can't reuse
type ID and identifier ID from imported modules arbitrarily. Since the
theory for no-transitive-change model is,
for a user of a named module, the user can only access the
indirectly imported decls via the directly imported module. So that it is
possible to control what matters to the users when writing the module.

And it will be unsafe to do so if the users can reuse the type IDs and
identifier IDs from the indirectly imported modules not via the directly
imported modules.

So in this patch, we don't reuse the type ID and identifier ID in the
AST writer to avoid the problematic case.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
@@ -5355,6 +5355,20 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   writeUnhashedControlBlock(PP, Context);
 
+  // Don't reuse type ID and Identifier ID from readers for C++ standard named
+  // modules since we want to support no-transitive-change model for named
+  // modules. The theory for no-transitive-change model is,
+  // for a user of a named module, the user can only access the indirectly
+  // imported decls via the directly imported module. So that it is possible to
+  // control what matters to the users when writing the module. It would be
+  // problematic if the users can reuse the type IDs and identifier IDs from
+  // indirectly imported modules arbitrarily. So we choose to clear these ID
+  // here.
+  if (isWritingStdCXXNamedModules()) {
+    TypeIdxs.clear();
+    IdentifierIDs.clear();
+  }
+
   // Look for any identifiers that were named while processing the
   // headers, but are otherwise not needed. We add these to the hash
   // table to enable checking of the predefines buffer in the case
@@ -6686,6 +6700,11 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) {
 }
 
 void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) {
+  // Don't reuse Type ID from external modules for named modules. See the
+  // comments in WriteASTCore for details.
+  if (isWritingStdCXXNamedModules())
+    return;
+
   IdentifierID &StoredID = IdentifierIDs[II];
   unsigned OriginalModuleFileIndex = StoredID >> 32;
 
@@ -6708,6 +6727,11 @@ void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) {
 }
 
 void ASTWriter::TypeRead(TypeIdx Idx, QualType T) {
+  // Don't reuse Type ID from external modules for named modules. See the
+  // comments in WriteASTCore for details.
+  if (isWritingStdCXXNamedModules())
+    return;
+
   // Always take the type index that comes in later module files.
   // This copes with an interesting
   // case for chained AST writing where we schedule writing the type and then,
diff --git a/clang/test/Modules/no-external-identifier-id.cppm b/clang/test/Modules/no-external-identifier-id.cppm
@@ -0,0 +1,37 @@
+// Testing that we won't record the identifier ID from external modules.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:     -fmodule-file=a=%t/a.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/b.pcm | FileCheck %t/b.cppm
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-module-interface -o %t/a.v1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.v1.pcm \
+// RUN:     -fmodule-file=a=%t/a.v1.pcm
+// RUN: diff %t/b.pcm %t/b.v1.pcm &> /dev/null
+
+//--- a.cppm
+export module a;
+export inline int a() {
+    int foo = 43;
+    return foo;
+}
+
+//--- b.cppm
+export module b;
+import a;
+export inline int b() {
+    int foo = 43;
+    return foo;
+}
+
+// CHECK: <DECL_VAR {{.*}} op5=4
+
+//--- a.v1.cppm
+// We remove the unused the function and testing if the format of the BMI of B will change.
+export module a;
+
diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm
@@ -0,0 +1,32 @@
+// Testing that we won't record the type ID from external modules.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:     -fmodule-file=a=%t/a.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/b.pcm | FileCheck %t/b.cppm
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-module-interface -o %t/a.v1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.v1.pcm \
+// RUN:     -fmodule-file=a=%t/a.v1.pcm
+// RUN: diff %t/b.pcm %t/b.v1.pcm &> /dev/null
+
+//--- a.cppm
+export module a;
+export int a();
+
+//--- b.cppm
+export module b;
+import a;
+export int b();
+
+// CHECK: <DECL_FUNCTION {{.*}} op8=4048
+// CHECK: <TYPE_FUNCTION_PROTO
+
+//--- a.v1.cppm
+// We remove the unused the function and testing if the format of the BMI of B will change.
+export module a;
+