Skip to content

Commit 6fb08d8

Browse files
committed
Reland "[clang][modules] Move UNHASHED_CONTROL_BLOCK up in the AST file"
This reverts commit b6ba804, effectively relanding commit 7d15657. The original commit incorrectly called `ASTWriter::writeUnhashedControlBlock()` before `ASTWriter::collectNonAffectingInputFiles()`, causing SourceLocations/FileIDs in the pragma diagnostic mappings block to be invalid. This is now tested by `clang/test/Modules/diag-mappings-affecting.c`.
1 parent d4a9121 commit 6fb08d8

File tree

8 files changed

+157
-60
lines changed

8 files changed

+157
-60
lines changed

clang/include/clang/Basic/Module.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
8181
return Sentinel;
8282
}
8383

84+
static ASTFileSignature createDummy() {
85+
ASTFileSignature Dummy;
86+
Dummy.fill(0x00);
87+
return Dummy;
88+
}
89+
8490
template <typename InputIt>
8591
static ASTFileSignature create(InputIt First, InputIt Last) {
8692
assert(std::distance(First, Last) == size &&

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 28;
44+
const unsigned VERSION_MAJOR = 29;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
128128
/// The module we're currently writing, if any.
129129
Module *WritingModule = nullptr;
130130

131-
/// The offset of the first bit inside the AST_BLOCK.
131+
/// The byte range representing all the UNHASHED_CONTROL_BLOCK.
132+
std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
133+
/// The bit offset of the AST block hash blob.
134+
uint64_t ASTBlockHashOffset = 0;
135+
/// The bit offset of the signature blob.
136+
uint64_t SignatureOffset = 0;
137+
138+
/// The bit offset of the first bit inside the AST_BLOCK.
132139
uint64_t ASTBlockStartOffset = 0;
133140

134-
/// The range representing all the AST_BLOCK.
141+
/// The byte range representing all the AST_BLOCK.
135142
std::pair<uint64_t, uint64_t> ASTBlockRange;
136143

137144
/// The base directory for any relative paths we emit.
@@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
495502
StringRef isysroot);
496503

497504
/// Write out the signature and diagnostic options, and return the signature.
498-
ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
499-
ASTContext &Context);
505+
void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
506+
ASTFileSignature backpatchSignature();
500507

501508
/// Calculate hash of the pcm content.
502-
static std::pair<ASTFileSignature, ASTFileSignature>
503-
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
509+
std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
504510

505511
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
506512
void WriteSourceManagerBlock(SourceManager &SourceMgr,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4734,12 +4734,6 @@ ASTReader::ReadASTCore(StringRef FileName,
47344734
ShouldFinalizePCM = true;
47354735
return Success;
47364736

4737-
case UNHASHED_CONTROL_BLOCK_ID:
4738-
// This block is handled using look-ahead during ReadControlBlock. We
4739-
// shouldn't get here!
4740-
Error("malformed block record in AST file");
4741-
return Failure;
4742-
47434737
default:
47444738
if (llvm::Error Err = Stream.SkipBlock()) {
47454739
Error(std::move(Err));
@@ -4859,13 +4853,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
48594853
}
48604854
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
48614855
case SIGNATURE:
4862-
if (F)
4863-
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
4856+
if (F) {
4857+
F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
4858+
assert(F->Signature != ASTFileSignature::createDummy() &&
4859+
"Dummy AST file signature not backpatched in ASTWriter.");
4860+
}
48644861
break;
48654862
case AST_BLOCK_HASH:
4866-
if (F)
4867-
F->ASTBlockHash =
4868-
ASTFileSignature::create(Record.begin(), Record.end());
4863+
if (F) {
4864+
F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
4865+
assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
4866+
"Dummy AST block hash not backpatched in ASTWriter.");
4867+
}
48694868
break;
48704869
case DIAGNOSTIC_OPTIONS: {
48714870
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
@@ -5167,9 +5166,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
51675166
consumeError(MaybeRecord.takeError());
51685167
return ASTFileSignature();
51695168
}
5170-
if (SIGNATURE == MaybeRecord.get())
5171-
return ASTFileSignature::create(Record.begin(),
5172-
Record.begin() + ASTFileSignature::size);
5169+
if (SIGNATURE == MaybeRecord.get()) {
5170+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
5171+
assert(Signature != ASTFileSignature::createDummy() &&
5172+
"Dummy AST file signature not backpatched in ASTWriter.");
5173+
return Signature;
5174+
}
51735175
}
51745176
}
51755177

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,50 +1120,97 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
11201120
}
11211121

11221122
std::pair<ASTFileSignature, ASTFileSignature>
1123-
ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
1123+
ASTWriter::createSignature() const {
1124+
StringRef AllBytes(Buffer.data(), Buffer.size());
1125+
11241126
llvm::SHA1 Hasher;
1125-
Hasher.update(ASTBlockBytes);
1127+
Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
11261128
ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
11271129

1128-
// Add the remaining bytes (i.e. bytes before the unhashed control block that
1129-
// are not part of the AST block).
1130-
Hasher.update(
1131-
AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
1130+
// Add the remaining bytes:
1131+
// 1. Before the unhashed control block.
1132+
Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
1133+
// 2. Between the unhashed control block and the AST block.
11321134
Hasher.update(
1133-
AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
1135+
AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
1136+
// 3. After the AST block.
1137+
Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
11341138
ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
11351139

11361140
return std::make_pair(ASTBlockHash, Signature);
11371141
}
11381142

1139-
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1140-
ASTContext &Context) {
1143+
ASTFileSignature ASTWriter::backpatchSignature() {
1144+
if (!WritingModule ||
1145+
!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
1146+
return {};
1147+
1148+
// For implicit modules, write the hash of the PCM as its signature.
1149+
1150+
auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
1151+
using WordT = unsigned;
1152+
std::array<WordT, sizeof(ASTFileSignature) / sizeof(WordT)> Words;
1153+
static_assert(sizeof(Words) == sizeof(S));
1154+
std::memcpy(Words.data(), S.data(), sizeof(ASTFileSignature));
1155+
for (WordT Word : Words) {
1156+
Stream.BackpatchWord(BitNo, Word);
1157+
BitNo += sizeof(WordT) * 8;
1158+
}
1159+
};
1160+
1161+
ASTFileSignature ASTBlockHash;
1162+
ASTFileSignature Signature;
1163+
std::tie(ASTBlockHash, Signature) = createSignature();
1164+
1165+
BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
1166+
BackpatchSignatureAt(Signature, SignatureOffset);
1167+
1168+
return Signature;
1169+
}
1170+
1171+
void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1172+
ASTContext &Context) {
11411173
using namespace llvm;
11421174

11431175
// Flush first to prepare the PCM hash (signature).
11441176
Stream.FlushToWord();
1145-
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
1177+
UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;
11461178

11471179
// Enter the block and prepare to write records.
11481180
RecordData Record;
11491181
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
11501182

11511183
// For implicit modules, write the hash of the PCM as its signature.
1152-
ASTFileSignature Signature;
11531184
if (WritingModule &&
11541185
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
1155-
ASTFileSignature ASTBlockHash;
1156-
auto ASTBlockStartByte = ASTBlockRange.first >> 3;
1157-
auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
1158-
std::tie(ASTBlockHash, Signature) = createSignature(
1159-
StringRef(Buffer.begin(), StartOfUnhashedControl),
1160-
StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
1161-
1162-
Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
1163-
Stream.EmitRecord(AST_BLOCK_HASH, Record);
1186+
// At this point, we don't know the actual signature of the file or the AST
1187+
// block - we're only able to compute those at the end of the serialization
1188+
// process. Let's store dummy signatures for now, and replace them with the
1189+
// real ones later on.
1190+
// The bitstream VBR-encodes record elements, which makes backpatching them
1191+
// really difficult. Let's store the signatures as blobs instead - they are
1192+
// guaranteed to be word-aligned, and we control their format/encoding.
1193+
auto Dummy = ASTFileSignature::createDummy();
1194+
SmallString<128> Blob{Dummy.begin(), Dummy.end()};
1195+
1196+
auto Abbrev = std::make_shared<BitCodeAbbrev>();
1197+
Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
1198+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1199+
unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1200+
1201+
Abbrev = std::make_shared<BitCodeAbbrev>();
1202+
Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
1203+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1204+
unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1205+
1206+
Record.push_back(AST_BLOCK_HASH);
1207+
Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
1208+
ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11641209
Record.clear();
1165-
Record.append(Signature.begin(), Signature.end());
1166-
Stream.EmitRecord(SIGNATURE, Record);
1210+
1211+
Record.push_back(SIGNATURE);
1212+
Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
1213+
SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11671214
Record.clear();
11681215
}
11691216

@@ -1232,7 +1279,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
12321279

12331280
// Leave the options block.
12341281
Stream.ExitBlock();
1235-
return Signature;
1282+
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
12361283
}
12371284

12381285
/// Write the control block.
@@ -4691,8 +4738,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
46914738
ASTContext &Context = SemaRef.Context;
46924739
Preprocessor &PP = SemaRef.PP;
46934740

4741+
// This needs to be done very early, since everything that writes
4742+
// SourceLocations or FileIDs depends on it.
46944743
collectNonAffectingInputFiles();
46954744

4745+
writeUnhashedControlBlock(PP, Context);
4746+
46964747
// Set up predefined declaration IDs.
46974748
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
46984749
if (D) {
@@ -4838,7 +4889,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
48384889

48394890
// Write the remaining AST contents.
48404891
Stream.FlushToWord();
4841-
ASTBlockRange.first = Stream.GetCurrentBitNo();
4892+
ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
48424893
Stream.EnterSubblock(AST_BLOCK_ID, 5);
48434894
ASTBlockStartOffset = Stream.GetCurrentBitNo();
48444895

@@ -5191,13 +5242,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
51915242
Stream.EmitRecord(STATISTICS, Record);
51925243
Stream.ExitBlock();
51935244
Stream.FlushToWord();
5194-
ASTBlockRange.second = Stream.GetCurrentBitNo();
5245+
ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
51955246

51965247
// Write the module file extension blocks.
51975248
for (const auto &ExtWriter : ModuleFileExtensionWriters)
51985249
WriteModuleFileExtension(SemaRef, *ExtWriter);
51995250

5200-
return writeUnhashedControlBlock(PP, Context);
5251+
return backpatchSignature();
52015252
}
52025253

52035254
void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

clang/lib/Serialization/GlobalModuleIndex.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -697,9 +697,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
697697
}
698698

699699
// Get Signature.
700-
if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
701-
getModuleFileInfo(File).Signature = ASTFileSignature::create(
702-
Record.begin(), Record.begin() + ASTFileSignature::size);
700+
if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
701+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
702+
assert(Signature != ASTFileSignature::createDummy() &&
703+
"Dummy AST file signature not backpatched in ASTWriter.");
704+
getModuleFileInfo(File).Signature = Signature;
705+
}
703706

704707
// We don't care about this record.
705708
}

clang/test/Modules/ASTSignature.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
// RUN: rm -rf %t
2-
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
3-
// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \
2+
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only \
3+
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
44
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
55
// RUN: cp %t/MyHeader2.pcm %t1.pcm
66
// RUN: rm -rf %t
77
// RUN: %clang_cc1 -nostdinc++ -iquote %S/Inputs/ASTHash/ -fsyntax-only \
88
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
99
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
1010
// RUN: cp %t/MyHeader2.pcm %t2.pcm
11-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
12-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
11+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump
12+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump
1313
// RUN: cat %t1.dump %t2.dump | FileCheck %s
1414

1515
#include "my_header_2.h"
1616

1717
my_int var = 42;
1818

19-
// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
20-
// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
21-
// CHECK: [[AST_BLOCK_HASH]]
22-
// CHECK-NOT: [[SIGNATURE]]
23-
// The modules built by this test are designed to yield the same AST. If this
24-
// test fails, it means that the AST block is has become non-relocatable.
19+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH:.*]]'
20+
// CHECK: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE:.*]]'
21+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH]]'
22+
// CHECK-NOT: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE]]'
23+
// The modules built by this test are designed to yield the same AST but distinct AST files.
24+
// If this test fails, it means that either the AST block has become non-relocatable,
25+
// or the file signature stopped hashing some parts of the AST file.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Test that pruning non-affecting input files happens before serializing
2+
// diagnostic pragma mappings.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
7+
// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache \
8+
// RUN: -I %t/include_a -I %t/include_textual -fsyntax-only %t/tu.c
9+
10+
//--- tu.c
11+
#include "a1.h"
12+
13+
//--- include_a/module.modulemap
14+
module A {
15+
header "a1.h"
16+
header "a2.h"
17+
}
18+
//--- include_a/a1.h
19+
#include "textual.h" // This will also load the non-affecting
20+
// include_textual/module.modulemap.
21+
#include "a2.h"
22+
//--- include_a/a2.h
23+
#pragma clang diagnostic push
24+
#pragma clang diagnostic ignored "-Wfloat-equal"
25+
#pragma clang diagnostic pop
26+
27+
//--- include_textual/module.modulemap
28+
//--- include_textual/textual.h

0 commit comments

Comments
 (0)