Skip to content

Commit 6dd00a2

Browse files
committed
Reland "[clang][modules] Move UNHASHED_CONTROL_BLOCK up in the AST file"
This reverts commit b6ba804, effectively relanding commit 7d15657. The original commit incorrectly called `ASTWriter::writeUnhashedControlBlock()` before `ASTWriter::collectNonAffectingInputFiles()`, causing SourceLocations/FileIDs in the pragma diagnostic mappings block to be invalid. This is now tested by `clang/test/Modules/diag-mappings-affecting.c`.
1 parent 49eb67c commit 6dd00a2

File tree

8 files changed

+157
-60
lines changed

8 files changed

+157
-60
lines changed

clang/include/clang/Basic/Module.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
8181
return Sentinel;
8282
}
8383

84+
static ASTFileSignature createDummy() {
85+
ASTFileSignature Dummy;
86+
Dummy.fill(0x00);
87+
return Dummy;
88+
}
89+
8490
template <typename InputIt>
8591
static ASTFileSignature create(InputIt First, InputIt Last) {
8692
assert(std::distance(First, Last) == size &&

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 28;
44+
const unsigned VERSION_MAJOR = 29;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
128128
/// The module we're currently writing, if any.
129129
Module *WritingModule = nullptr;
130130

131-
/// The offset of the first bit inside the AST_BLOCK.
131+
/// The byte range representing all the UNHASHED_CONTROL_BLOCK.
132+
std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
133+
/// The bit offset of the AST block hash blob.
134+
uint64_t ASTBlockHashOffset = 0;
135+
/// The bit offset of the signature blob.
136+
uint64_t SignatureOffset = 0;
137+
138+
/// The bit offset of the first bit inside the AST_BLOCK.
132139
uint64_t ASTBlockStartOffset = 0;
133140

134-
/// The range representing all the AST_BLOCK.
141+
/// The byte range representing all the AST_BLOCK.
135142
std::pair<uint64_t, uint64_t> ASTBlockRange;
136143

137144
/// The base directory for any relative paths we emit.
@@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
495502
StringRef isysroot);
496503

497504
/// Write out the signature and diagnostic options, and return the signature.
498-
ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
499-
ASTContext &Context);
505+
void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
506+
ASTFileSignature backpatchSignature();
500507

501508
/// Calculate hash of the pcm content.
502-
static std::pair<ASTFileSignature, ASTFileSignature>
503-
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
509+
std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
504510

505511
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
506512
void WriteSourceManagerBlock(SourceManager &SourceMgr,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4680,12 +4680,6 @@ ASTReader::ReadASTCore(StringRef FileName,
46804680
ShouldFinalizePCM = true;
46814681
return Success;
46824682

4683-
case UNHASHED_CONTROL_BLOCK_ID:
4684-
// This block is handled using look-ahead during ReadControlBlock. We
4685-
// shouldn't get here!
4686-
Error("malformed block record in AST file");
4687-
return Failure;
4688-
46894683
default:
46904684
if (llvm::Error Err = Stream.SkipBlock()) {
46914685
Error(std::move(Err));
@@ -4810,13 +4804,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
48104804
}
48114805
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
48124806
case SIGNATURE:
4813-
if (F)
4814-
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
4807+
if (F) {
4808+
F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
4809+
assert(F->Signature != ASTFileSignature::createDummy() &&
4810+
"Dummy AST file signature not backpatched in ASTWriter.");
4811+
}
48154812
break;
48164813
case AST_BLOCK_HASH:
4817-
if (F)
4818-
F->ASTBlockHash =
4819-
ASTFileSignature::create(Record.begin(), Record.end());
4814+
if (F) {
4815+
F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
4816+
assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
4817+
"Dummy AST block hash not backpatched in ASTWriter.");
4818+
}
48204819
break;
48214820
case DIAGNOSTIC_OPTIONS: {
48224821
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
@@ -5126,9 +5125,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
51265125
consumeError(MaybeRecord.takeError());
51275126
return ASTFileSignature();
51285127
}
5129-
if (SIGNATURE == MaybeRecord.get())
5130-
return ASTFileSignature::create(Record.begin(),
5131-
Record.begin() + ASTFileSignature::size);
5128+
if (SIGNATURE == MaybeRecord.get()) {
5129+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
5130+
assert(Signature != ASTFileSignature::createDummy() &&
5131+
"Dummy AST file signature not backpatched in ASTWriter.");
5132+
return Signature;
5133+
}
51325134
}
51335135
}
51345136

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,50 +1119,97 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
11191119
}
11201120

11211121
std::pair<ASTFileSignature, ASTFileSignature>
1122-
ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
1122+
ASTWriter::createSignature() const {
1123+
StringRef AllBytes(Buffer.data(), Buffer.size());
1124+
11231125
llvm::SHA1 Hasher;
1124-
Hasher.update(ASTBlockBytes);
1126+
Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
11251127
ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
11261128

1127-
// Add the remaining bytes (i.e. bytes before the unhashed control block that
1128-
// are not part of the AST block).
1129-
Hasher.update(
1130-
AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
1129+
// Add the remaining bytes:
1130+
// 1. Before the unhashed control block.
1131+
Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
1132+
// 2. Between the unhashed control block and the AST block.
11311133
Hasher.update(
1132-
AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
1134+
AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
1135+
// 3. After the AST block.
1136+
Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
11331137
ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
11341138

11351139
return std::make_pair(ASTBlockHash, Signature);
11361140
}
11371141

1138-
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1139-
ASTContext &Context) {
1142+
ASTFileSignature ASTWriter::backpatchSignature() {
1143+
if (!WritingModule ||
1144+
!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
1145+
return {};
1146+
1147+
// For implicit modules, write the hash of the PCM as its signature.
1148+
1149+
auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
1150+
using WordT = unsigned;
1151+
std::array<WordT, sizeof(ASTFileSignature) / sizeof(WordT)> Words;
1152+
static_assert(sizeof(Words) == sizeof(S));
1153+
std::memcpy(Words.data(), S.data(), sizeof(ASTFileSignature));
1154+
for (WordT Word : Words) {
1155+
Stream.BackpatchWord(BitNo, Word);
1156+
BitNo += sizeof(WordT) * 8;
1157+
}
1158+
};
1159+
1160+
ASTFileSignature ASTBlockHash;
1161+
ASTFileSignature Signature;
1162+
std::tie(ASTBlockHash, Signature) = createSignature();
1163+
1164+
BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
1165+
BackpatchSignatureAt(Signature, SignatureOffset);
1166+
1167+
return Signature;
1168+
}
1169+
1170+
void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1171+
ASTContext &Context) {
11401172
using namespace llvm;
11411173

11421174
// Flush first to prepare the PCM hash (signature).
11431175
Stream.FlushToWord();
1144-
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
1176+
UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;
11451177

11461178
// Enter the block and prepare to write records.
11471179
RecordData Record;
11481180
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
11491181

11501182
// For implicit modules, write the hash of the PCM as its signature.
1151-
ASTFileSignature Signature;
11521183
if (WritingModule &&
11531184
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
1154-
ASTFileSignature ASTBlockHash;
1155-
auto ASTBlockStartByte = ASTBlockRange.first >> 3;
1156-
auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
1157-
std::tie(ASTBlockHash, Signature) = createSignature(
1158-
StringRef(Buffer.begin(), StartOfUnhashedControl),
1159-
StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
1160-
1161-
Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
1162-
Stream.EmitRecord(AST_BLOCK_HASH, Record);
1185+
// At this point, we don't know the actual signature of the file or the AST
1186+
// block - we're only able to compute those at the end of the serialization
1187+
// process. Let's store dummy signatures for now, and replace them with the
1188+
// real ones later on.
1189+
// The bitstream VBR-encodes record elements, which makes backpatching them
1190+
// really difficult. Let's store the signatures as blobs instead - they are
1191+
// guaranteed to be word-aligned, and we control their format/encoding.
1192+
auto Dummy = ASTFileSignature::createDummy();
1193+
SmallString<128> Blob{Dummy.begin(), Dummy.end()};
1194+
1195+
auto Abbrev = std::make_shared<BitCodeAbbrev>();
1196+
Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
1197+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1198+
unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1199+
1200+
Abbrev = std::make_shared<BitCodeAbbrev>();
1201+
Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
1202+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1203+
unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1204+
1205+
Record.push_back(AST_BLOCK_HASH);
1206+
Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
1207+
ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11631208
Record.clear();
1164-
Record.append(Signature.begin(), Signature.end());
1165-
Stream.EmitRecord(SIGNATURE, Record);
1209+
1210+
Record.push_back(SIGNATURE);
1211+
Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
1212+
SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11661213
Record.clear();
11671214
}
11681215

@@ -1242,7 +1289,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
12421289

12431290
// Leave the options block.
12441291
Stream.ExitBlock();
1245-
return Signature;
1292+
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
12461293
}
12471294

12481295
/// Write the control block.
@@ -4656,8 +4703,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
46564703
ASTContext &Context = SemaRef.Context;
46574704
Preprocessor &PP = SemaRef.PP;
46584705

4706+
// This needs to be done very early, since everything that writes
4707+
// SourceLocations or FileIDs depends on it.
46594708
collectNonAffectingInputFiles();
46604709

4710+
writeUnhashedControlBlock(PP, Context);
4711+
46614712
// Set up predefined declaration IDs.
46624713
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
46634714
if (D) {
@@ -4803,7 +4854,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
48034854

48044855
// Write the remaining AST contents.
48054856
Stream.FlushToWord();
4806-
ASTBlockRange.first = Stream.GetCurrentBitNo();
4857+
ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
48074858
Stream.EnterSubblock(AST_BLOCK_ID, 5);
48084859
ASTBlockStartOffset = Stream.GetCurrentBitNo();
48094860

@@ -5160,13 +5211,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
51605211
Stream.EmitRecord(STATISTICS, Record);
51615212
Stream.ExitBlock();
51625213
Stream.FlushToWord();
5163-
ASTBlockRange.second = Stream.GetCurrentBitNo();
5214+
ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
51645215

51655216
// Write the module file extension blocks.
51665217
for (const auto &ExtWriter : ModuleFileExtensionWriters)
51675218
WriteModuleFileExtension(SemaRef, *ExtWriter);
51685219

5169-
return writeUnhashedControlBlock(PP, Context);
5220+
return backpatchSignature();
51705221
}
51715222

51725223
void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

clang/lib/Serialization/GlobalModuleIndex.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -706,9 +706,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
706706
}
707707

708708
// Get Signature.
709-
if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
710-
getModuleFileInfo(File).Signature = ASTFileSignature::create(
711-
Record.begin(), Record.begin() + ASTFileSignature::size);
709+
if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
710+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
711+
assert(Signature != ASTFileSignature::createDummy() &&
712+
"Dummy AST file signature not backpatched in ASTWriter.");
713+
getModuleFileInfo(File).Signature = Signature;
714+
}
712715

713716
// We don't care about this record.
714717
}

clang/test/Modules/ASTSignature.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
// RUN: rm -rf %t
2-
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
3-
// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \
2+
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only \
3+
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
44
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
55
// RUN: cp %t/MyHeader2.pcm %t1.pcm
66
// RUN: rm -rf %t
77
// RUN: %clang_cc1 -nostdinc++ -iquote %S/Inputs/ASTHash/ -fsyntax-only \
88
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
99
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
1010
// RUN: cp %t/MyHeader2.pcm %t2.pcm
11-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
12-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
11+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump
12+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump
1313
// RUN: cat %t1.dump %t2.dump | FileCheck %s
1414

1515
#include "my_header_2.h"
1616

1717
my_int var = 42;
1818

19-
// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
20-
// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
21-
// CHECK: [[AST_BLOCK_HASH]]
22-
// CHECK-NOT: [[SIGNATURE]]
23-
// The modules built by this test are designed to yield the same AST. If this
24-
// test fails, it means that the AST block is has become non-relocatable.
19+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH:.*]]'
20+
// CHECK: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE:.*]]'
21+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH]]'
22+
// CHECK-NOT: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE]]'
23+
// The modules built by this test are designed to yield the same AST but distinct AST files.
24+
// If this test fails, it means that either the AST block has become non-relocatable,
25+
// or the file signature stopped hashing some parts of the AST file.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Test that pruning non-affecting input files happens before serializing
2+
// diagnostic pragma mappings.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
7+
// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache \
8+
// RUN: -I %t/include_a -I %t/include_textual -fsyntax-only %t/tu.c
9+
10+
//--- tu.c
11+
#include "a1.h"
12+
13+
//--- include_a/module.modulemap
14+
module A {
15+
header "a1.h"
16+
header "a2.h"
17+
}
18+
//--- include_a/a1.h
19+
#include "textual.h" // This will also load the non-affecting
20+
// include_textual/module.modulemap.
21+
#include "a2.h"
22+
//--- include_a/a2.h
23+
#pragma clang diagnostic push
24+
#pragma clang diagnostic ignored "-Wfloat-equal"
25+
#pragma clang diagnostic pop
26+
27+
//--- include_textual/module.modulemap
28+
//--- include_textual/textual.h

0 commit comments

Comments
 (0)