Skip to content

Commit 7d15657

Browse files
committed
[clang][modules] Move UNHASHED_CONTROL_BLOCK up in the AST file
When loading (transitively) imported AST file, `ModuleManager::addModule()` first checks it has the expected signature via `readASTFileSignature()`. The signature is part of `UNHASHED_CONTROL_BLOCK`, which is placed at the end of the AST file. This means that just to verify signature of an AST file, we need to skip over all top-level blocks, paging in the whole AST file from disk. This is pretty slow. This patch moves `UNHASHED_CONTROL_BLOCK` to the start of the AST file, so that it can be read more efficiently. To achieve this, we use dummy signature when first emitting the unhashed control block, and then backpatch the real signature at the end of the serialization process. This speeds up dependency scanning by over 9% and significantly reduces run-to-run variability of my benchmarks. Depends on D158572. Reviewed By: benlangmuir Differential Revision: https://reviews.llvm.org/D158573
1 parent b9d78bd commit 7d15657

File tree

7 files changed

+126
-60
lines changed

7 files changed

+126
-60
lines changed

clang/include/clang/Basic/Module.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
8181
return Sentinel;
8282
}
8383

84+
static ASTFileSignature createDummy() {
85+
ASTFileSignature Dummy;
86+
Dummy.fill(0x00);
87+
return Dummy;
88+
}
89+
8490
template <typename InputIt>
8591
static ASTFileSignature create(InputIt First, InputIt Last) {
8692
assert(std::distance(First, Last) == size &&

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 28;
44+
const unsigned VERSION_MAJOR = 29;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
128128
/// The module we're currently writing, if any.
129129
Module *WritingModule = nullptr;
130130

131-
/// The offset of the first bit inside the AST_BLOCK.
131+
/// The byte range representing all the UNHASHED_CONTROL_BLOCK.
132+
std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
133+
/// The bit offset of the AST block hash blob.
134+
uint64_t ASTBlockHashOffset = 0;
135+
/// The bit offset of the signature blob.
136+
uint64_t SignatureOffset = 0;
137+
138+
/// The bit offset of the first bit inside the AST_BLOCK.
132139
uint64_t ASTBlockStartOffset = 0;
133140

134-
/// The range representing all the AST_BLOCK.
141+
/// The byte range representing all the AST_BLOCK.
135142
std::pair<uint64_t, uint64_t> ASTBlockRange;
136143

137144
/// The base directory for any relative paths we emit.
@@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
495502
StringRef isysroot);
496503

497504
/// Write out the signature and diagnostic options, and return the signature.
498-
ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
499-
ASTContext &Context);
505+
void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
506+
ASTFileSignature backpatchSignature();
500507

501508
/// Calculate hash of the pcm content.
502-
static std::pair<ASTFileSignature, ASTFileSignature>
503-
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
509+
std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
504510

505511
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
506512
void WriteSourceManagerBlock(SourceManager &SourceMgr,

clang/lib/Serialization/ASTReader.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4734,12 +4734,6 @@ ASTReader::ReadASTCore(StringRef FileName,
47344734
ShouldFinalizePCM = true;
47354735
return Success;
47364736

4737-
case UNHASHED_CONTROL_BLOCK_ID:
4738-
// This block is handled using look-ahead during ReadControlBlock. We
4739-
// shouldn't get here!
4740-
Error("malformed block record in AST file");
4741-
return Failure;
4742-
47434737
default:
47444738
if (llvm::Error Err = Stream.SkipBlock()) {
47454739
Error(std::move(Err));
@@ -4859,13 +4853,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
48594853
}
48604854
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
48614855
case SIGNATURE:
4862-
if (F)
4863-
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
4856+
if (F) {
4857+
F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
4858+
assert(F->Signature != ASTFileSignature::createDummy() &&
4859+
"Dummy AST file signature not backpatched in ASTWriter.");
4860+
}
48644861
break;
48654862
case AST_BLOCK_HASH:
4866-
if (F)
4867-
F->ASTBlockHash =
4868-
ASTFileSignature::create(Record.begin(), Record.end());
4863+
if (F) {
4864+
F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
4865+
assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
4866+
"Dummy AST block hash not backpatched in ASTWriter.");
4867+
}
48694868
break;
48704869
case DIAGNOSTIC_OPTIONS: {
48714870
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
@@ -5167,9 +5166,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
51675166
consumeError(MaybeRecord.takeError());
51685167
return ASTFileSignature();
51695168
}
5170-
if (SIGNATURE == MaybeRecord.get())
5171-
return ASTFileSignature::create(Record.begin(),
5172-
Record.begin() + ASTFileSignature::size);
5169+
if (SIGNATURE == MaybeRecord.get()) {
5170+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
5171+
assert(Signature != ASTFileSignature::createDummy() &&
5172+
"Dummy AST file signature not backpatched in ASTWriter.");
5173+
return Signature;
5174+
}
51735175
}
51745176
}
51755177

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 74 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,50 +1120,97 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
11201120
}
11211121

11221122
std::pair<ASTFileSignature, ASTFileSignature>
1123-
ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
1123+
ASTWriter::createSignature() const {
1124+
StringRef AllBytes(Buffer.data(), Buffer.size());
1125+
11241126
llvm::SHA1 Hasher;
1125-
Hasher.update(ASTBlockBytes);
1127+
Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
11261128
ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
11271129

1128-
// Add the remaining bytes (i.e. bytes before the unhashed control block that
1129-
// are not part of the AST block).
1130-
Hasher.update(
1131-
AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
1130+
// Add the remaining bytes:
1131+
// 1. Before the unhashed control block.
1132+
Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
1133+
// 2. Between the unhashed control block and the AST block.
11321134
Hasher.update(
1133-
AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
1135+
AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
1136+
// 3. After the AST block.
1137+
Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
11341138
ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
11351139

11361140
return std::make_pair(ASTBlockHash, Signature);
11371141
}
11381142

1139-
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1140-
ASTContext &Context) {
1143+
ASTFileSignature ASTWriter::backpatchSignature() {
1144+
if (!WritingModule ||
1145+
!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
1146+
return {};
1147+
1148+
// For implicit modules, write the hash of the PCM as its signature.
1149+
1150+
auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
1151+
using WordT = unsigned;
1152+
std::array<WordT, sizeof(ASTFileSignature) / sizeof(WordT)> Words;
1153+
static_assert(sizeof(Words) == sizeof(S));
1154+
std::memcpy(Words.data(), S.data(), sizeof(ASTFileSignature));
1155+
for (WordT Word : Words) {
1156+
Stream.BackpatchWord(BitNo, Word);
1157+
BitNo += sizeof(WordT) * 8;
1158+
}
1159+
};
1160+
1161+
ASTFileSignature ASTBlockHash;
1162+
ASTFileSignature Signature;
1163+
std::tie(ASTBlockHash, Signature) = createSignature();
1164+
1165+
BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
1166+
BackpatchSignatureAt(Signature, SignatureOffset);
1167+
1168+
return Signature;
1169+
}
1170+
1171+
void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1172+
ASTContext &Context) {
11411173
using namespace llvm;
11421174

11431175
// Flush first to prepare the PCM hash (signature).
11441176
Stream.FlushToWord();
1145-
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
1177+
UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;
11461178

11471179
// Enter the block and prepare to write records.
11481180
RecordData Record;
11491181
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
11501182

11511183
// For implicit modules, write the hash of the PCM as its signature.
1152-
ASTFileSignature Signature;
11531184
if (WritingModule &&
11541185
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
1155-
ASTFileSignature ASTBlockHash;
1156-
auto ASTBlockStartByte = ASTBlockRange.first >> 3;
1157-
auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
1158-
std::tie(ASTBlockHash, Signature) = createSignature(
1159-
StringRef(Buffer.begin(), StartOfUnhashedControl),
1160-
StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
1161-
1162-
Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
1163-
Stream.EmitRecord(AST_BLOCK_HASH, Record);
1186+
// At this point, we don't know the actual signature of the file or the AST
1187+
// block - we're only able to compute those at the end of the serialization
1188+
// process. Let's store dummy signatures for now, and replace them with the
1189+
// real ones later on.
1190+
// The bitstream VBR-encodes record elements, which makes backpatching them
1191+
// really difficult. Let's store the signatures as blobs instead - they are
1192+
// guaranteed to be word-aligned, and we control their format/encoding.
1193+
auto Dummy = ASTFileSignature::createDummy();
1194+
SmallString<128> Blob{Dummy.begin(), Dummy.end()};
1195+
1196+
auto Abbrev = std::make_shared<BitCodeAbbrev>();
1197+
Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
1198+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1199+
unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1200+
1201+
Abbrev = std::make_shared<BitCodeAbbrev>();
1202+
Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
1203+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1204+
unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1205+
1206+
Record.push_back(AST_BLOCK_HASH);
1207+
Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
1208+
ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11641209
Record.clear();
1165-
Record.append(Signature.begin(), Signature.end());
1166-
Stream.EmitRecord(SIGNATURE, Record);
1210+
1211+
Record.push_back(SIGNATURE);
1212+
Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
1213+
SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11671214
Record.clear();
11681215
}
11691216

@@ -1232,7 +1279,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
12321279

12331280
// Leave the options block.
12341281
Stream.ExitBlock();
1235-
return Signature;
1282+
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
12361283
}
12371284

12381285
/// Write the control block.
@@ -4690,6 +4737,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
46904737

46914738
ASTContext &Context = SemaRef.Context;
46924739
Preprocessor &PP = SemaRef.PP;
4740+
writeUnhashedControlBlock(PP, Context);
46934741

46944742
collectNonAffectingInputFiles();
46954743

@@ -4838,7 +4886,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
48384886

48394887
// Write the remaining AST contents.
48404888
Stream.FlushToWord();
4841-
ASTBlockRange.first = Stream.GetCurrentBitNo();
4889+
ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
48424890
Stream.EnterSubblock(AST_BLOCK_ID, 5);
48434891
ASTBlockStartOffset = Stream.GetCurrentBitNo();
48444892

@@ -5191,13 +5239,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
51915239
Stream.EmitRecord(STATISTICS, Record);
51925240
Stream.ExitBlock();
51935241
Stream.FlushToWord();
5194-
ASTBlockRange.second = Stream.GetCurrentBitNo();
5242+
ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
51955243

51965244
// Write the module file extension blocks.
51975245
for (const auto &ExtWriter : ModuleFileExtensionWriters)
51985246
WriteModuleFileExtension(SemaRef, *ExtWriter);
51995247

5200-
return writeUnhashedControlBlock(PP, Context);
5248+
return backpatchSignature();
52015249
}
52025250

52035251
void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

clang/lib/Serialization/GlobalModuleIndex.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -697,9 +697,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
697697
}
698698

699699
// Get Signature.
700-
if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
701-
getModuleFileInfo(File).Signature = ASTFileSignature::create(
702-
Record.begin(), Record.begin() + ASTFileSignature::size);
700+
if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
701+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
702+
assert(Signature != ASTFileSignature::createDummy() &&
703+
"Dummy AST file signature not backpatched in ASTWriter.");
704+
getModuleFileInfo(File).Signature = Signature;
705+
}
703706

704707
// We don't care about this record.
705708
}

clang/test/Modules/ASTSignature.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
// RUN: rm -rf %t
2-
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
3-
// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \
2+
// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only \
3+
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
44
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
55
// RUN: cp %t/MyHeader2.pcm %t1.pcm
66
// RUN: rm -rf %t
77
// RUN: %clang_cc1 -nostdinc++ -iquote %S/Inputs/ASTHash/ -fsyntax-only \
88
// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
99
// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
1010
// RUN: cp %t/MyHeader2.pcm %t2.pcm
11-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
12-
// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
11+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump
12+
// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump
1313
// RUN: cat %t1.dump %t2.dump | FileCheck %s
1414

1515
#include "my_header_2.h"
1616

1717
my_int var = 42;
1818

19-
// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
20-
// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
21-
// CHECK: [[AST_BLOCK_HASH]]
22-
// CHECK-NOT: [[SIGNATURE]]
23-
// The modules built by this test are designed to yield the same AST. If this
24-
// test fails, it means that the AST block is has become non-relocatable.
19+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH:.*]]'
20+
// CHECK: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE:.*]]'
21+
// CHECK: <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH]]'
22+
// CHECK-NOT: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE]]'
23+
// The modules built by this test are designed to yield the same AST but distinct AST files.
24+
// If this test fails, it means that either the AST block has become non-relocatable,
25+
// or the file signature stopped hashing some parts of the AST file.

0 commit comments

Comments
 (0)