Skip to content

Commit 104f729

Browse files
authored
Merge pull request #7395 from apple/jan_svoboda/20221013-control-block
[clang][deps] Cherry-pick changes to the unhashed control block
2 parents 68375da + e99d21a commit 104f729

File tree

10 files changed

+191
-78
lines changed

10 files changed

+191
-78
lines changed

clang/include/clang/Basic/Module.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
8181
return Sentinel;
8282
}
8383

84+
static ASTFileSignature createDummy() {
85+
ASTFileSignature Dummy;
86+
Dummy.fill(0x00);
87+
return Dummy;
88+
}
89+
8490
template <typename InputIt>
8591
static ASTFileSignature create(InputIt First, InputIt Last) {
8692
assert(std::distance(First, Last) == size &&

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 28;
44+
const unsigned VERSION_MAJOR = 29;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
128128
/// The module we're currently writing, if any.
129129
Module *WritingModule = nullptr;
130130

131-
/// The offset of the first bit inside the AST_BLOCK.
131+
/// The byte range representing all the UNHASHED_CONTROL_BLOCK.
132+
std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
133+
/// The bit offset of the AST block hash blob.
134+
uint64_t ASTBlockHashOffset = 0;
135+
/// The bit offset of the signature blob.
136+
uint64_t SignatureOffset = 0;
137+
138+
/// The bit offset of the first bit inside the AST_BLOCK.
132139
uint64_t ASTBlockStartOffset = 0;
133140

134-
/// The range representing all the AST_BLOCK.
141+
/// The byte range representing all the AST_BLOCK.
135142
std::pair<uint64_t, uint64_t> ASTBlockRange;
136143

137144
/// The base directory for any relative paths we emit.
@@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
495502
StringRef isysroot);
496503

497504
/// Write out the signature and diagnostic options, and return the signature.
498-
ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
499-
ASTContext &Context);
505+
void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
506+
ASTFileSignature backpatchSignature();
500507

501508
/// Calculate hash of the pcm content.
502-
static std::pair<ASTFileSignature, ASTFileSignature>
503-
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
509+
std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
504510

505511
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
506512
void WriteSourceManagerBlock(SourceManager &SourceMgr,

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,10 @@ class ModuleFile {
253253
/// The cursor to the start of the input-files block.
254254
llvm::BitstreamCursor InputFilesCursor;
255255

256-
/// Offsets for all of the input file entries in the AST file.
256+
/// Absolute offset of the start of the input-files block.
257+
uint64_t InputFilesOffsetBase = 0;
258+
259+
/// Relative offsets for all of the input file entries in the AST file.
257260
const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
258261

259262
/// The input files that have been loaded from this AST file.

clang/lib/Serialization/ASTReader.cpp

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,7 +2278,8 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
22782278
// Go find this input file.
22792279
BitstreamCursor &Cursor = F.InputFilesCursor;
22802280
SavedStreamPosition SavedPosition(Cursor);
2281-
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
2281+
if (llvm::Error Err = Cursor.JumpToBit(F.InputFilesOffsetBase +
2282+
F.InputFileOffsets[ID - 1])) {
22822283
// FIXME this drops errors on the floor.
22832284
consumeError(std::move(Err));
22842285
}
@@ -2362,7 +2363,8 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
23622363
// Go find this input file.
23632364
BitstreamCursor &Cursor = F.InputFilesCursor;
23642365
SavedStreamPosition SavedPosition(Cursor);
2365-
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
2366+
if (llvm::Error Err = Cursor.JumpToBit(F.InputFilesOffsetBase +
2367+
F.InputFileOffsets[ID - 1])) {
23662368
// FIXME this drops errors on the floor.
23672369
consumeError(std::move(Err));
23682370
}
@@ -2732,6 +2734,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
27322734
Error("malformed block record in AST file");
27332735
return Failure;
27342736
}
2737+
F.InputFilesOffsetBase = F.InputFilesCursor.GetCurrentBitNo();
27352738
continue;
27362739

27372740
case OPTIONS_BLOCK_ID:
@@ -4677,12 +4680,6 @@ ASTReader::ReadASTCore(StringRef FileName,
46774680
ShouldFinalizePCM = true;
46784681
return Success;
46794682

4680-
case UNHASHED_CONTROL_BLOCK_ID:
4681-
// This block is handled using look-ahead during ReadControlBlock. We
4682-
// shouldn't get here!
4683-
Error("malformed block record in AST file");
4684-
return Failure;
4685-
46864683
default:
46874684
if (llvm::Error Err = Stream.SkipBlock()) {
46884685
Error(std::move(Err));
@@ -4807,13 +4804,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
48074804
}
48084805
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
48094806
case SIGNATURE:
4810-
if (F)
4811-
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
4807+
if (F) {
4808+
F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
4809+
assert(F->Signature != ASTFileSignature::createDummy() &&
4810+
"Dummy AST file signature not backpatched in ASTWriter.");
4811+
}
48124812
break;
48134813
case AST_BLOCK_HASH:
4814-
if (F)
4815-
F->ASTBlockHash =
4816-
ASTFileSignature::create(Record.begin(), Record.end());
4814+
if (F) {
4815+
F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
4816+
assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
4817+
"Dummy AST block hash not backpatched in ASTWriter.");
4818+
}
48174819
break;
48184820
case DIAGNOSTIC_OPTIONS: {
48194821
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
@@ -5123,9 +5125,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
51235125
consumeError(MaybeRecord.takeError());
51245126
return ASTFileSignature();
51255127
}
5126-
if (SIGNATURE == MaybeRecord.get())
5127-
return ASTFileSignature::create(Record.begin(),
5128-
Record.begin() + ASTFileSignature::size);
5128+
if (SIGNATURE == MaybeRecord.get()) {
5129+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
5130+
assert(Signature != ASTFileSignature::createDummy() &&
5131+
"Dummy AST file signature not backpatched in ASTWriter.");
5132+
return Signature;
5133+
}
51295134
}
51305135
}
51315136

@@ -5287,6 +5292,7 @@ bool ASTReader::readASTFileControlBlock(
52875292
bool NeedsSystemInputFiles = Listener.needsSystemInputFileVisitation();
52885293
bool NeedsImports = Listener.needsImportVisitation();
52895294
BitstreamCursor InputFilesCursor;
5295+
uint64_t InputFilesOffsetBase = 0;
52905296

52915297
RecordData Record;
52925298
std::string ModuleDir;
@@ -5322,6 +5328,7 @@ bool ASTReader::readASTFileControlBlock(
53225328
if (NeedsInputFiles &&
53235329
ReadBlockAbbrevs(InputFilesCursor, INPUT_FILES_BLOCK_ID))
53245330
return true;
5331+
InputFilesOffsetBase = InputFilesCursor.GetCurrentBitNo();
53255332
break;
53265333

53275334
default:
@@ -5394,7 +5401,8 @@ bool ASTReader::readASTFileControlBlock(
53945401

53955402
BitstreamCursor &Cursor = InputFilesCursor;
53965403
SavedStreamPosition SavedPosition(Cursor);
5397-
if (llvm::Error Err = Cursor.JumpToBit(InputFileOffs[I])) {
5404+
if (llvm::Error Err =
5405+
Cursor.JumpToBit(InputFilesOffsetBase + InputFileOffs[I])) {
53985406
// FIXME this drops errors on the floor.
53995407
consumeError(std::move(Err));
54005408
}
@@ -6476,8 +6484,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
64766484
// Read the final state.
64776485
assert(Idx < Record.size() &&
64786486
"Invalid data, missing final pragma diagnostic state");
6479-
SourceLocation CurStateLoc =
6480-
ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]);
6487+
SourceLocation CurStateLoc = ReadSourceLocation(F, Record[Idx++]);
64816488
auto *CurState = ReadDiagState(*FirstState, false);
64826489

64836490
if (!F.isModule()) {

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,50 +1119,93 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
11191119
}
11201120

11211121
std::pair<ASTFileSignature, ASTFileSignature>
1122-
ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
1122+
ASTWriter::createSignature() const {
1123+
StringRef AllBytes(Buffer.data(), Buffer.size());
1124+
11231125
llvm::SHA1 Hasher;
1124-
Hasher.update(ASTBlockBytes);
1126+
Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
11251127
ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
11261128

1127-
// Add the remaining bytes (i.e. bytes before the unhashed control block that
1128-
// are not part of the AST block).
1129-
Hasher.update(
1130-
AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
1129+
// Add the remaining bytes:
1130+
// 1. Before the unhashed control block.
1131+
Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
1132+
// 2. Between the unhashed control block and the AST block.
11311133
Hasher.update(
1132-
AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
1134+
AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
1135+
// 3. After the AST block.
1136+
Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
11331137
ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
11341138

11351139
return std::make_pair(ASTBlockHash, Signature);
11361140
}
11371141

1138-
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1139-
ASTContext &Context) {
1142+
ASTFileSignature ASTWriter::backpatchSignature() {
1143+
if (!WritingModule ||
1144+
!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
1145+
return {};
1146+
1147+
// For implicit modules, write the hash of the PCM as its signature.
1148+
1149+
auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
1150+
for (uint8_t Byte : S) {
1151+
Stream.BackpatchByte(BitNo, Byte);
1152+
BitNo += 8;
1153+
}
1154+
};
1155+
1156+
ASTFileSignature ASTBlockHash;
1157+
ASTFileSignature Signature;
1158+
std::tie(ASTBlockHash, Signature) = createSignature();
1159+
1160+
BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
1161+
BackpatchSignatureAt(Signature, SignatureOffset);
1162+
1163+
return Signature;
1164+
}
1165+
1166+
void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
1167+
ASTContext &Context) {
11401168
using namespace llvm;
11411169

11421170
// Flush first to prepare the PCM hash (signature).
11431171
Stream.FlushToWord();
1144-
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
1172+
UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;
11451173

11461174
// Enter the block and prepare to write records.
11471175
RecordData Record;
11481176
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
11491177

11501178
// For implicit modules, write the hash of the PCM as its signature.
1151-
ASTFileSignature Signature;
11521179
if (WritingModule &&
11531180
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
1154-
ASTFileSignature ASTBlockHash;
1155-
auto ASTBlockStartByte = ASTBlockRange.first >> 3;
1156-
auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
1157-
std::tie(ASTBlockHash, Signature) = createSignature(
1158-
StringRef(Buffer.begin(), StartOfUnhashedControl),
1159-
StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
1160-
1161-
Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
1162-
Stream.EmitRecord(AST_BLOCK_HASH, Record);
1181+
// At this point, we don't know the actual signature of the file or the AST
1182+
// block - we're only able to compute those at the end of the serialization
1183+
// process. Let's store dummy signatures for now, and replace them with the
1184+
// real ones later on.
1185+
// The bitstream VBR-encodes record elements, which makes backpatching them
1186+
// really difficult. Let's store the signatures as blobs instead - they are
1187+
// guaranteed to be word-aligned, and we control their format/encoding.
1188+
auto Dummy = ASTFileSignature::createDummy();
1189+
SmallString<128> Blob{Dummy.begin(), Dummy.end()};
1190+
1191+
auto Abbrev = std::make_shared<BitCodeAbbrev>();
1192+
Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
1193+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1194+
unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1195+
1196+
Abbrev = std::make_shared<BitCodeAbbrev>();
1197+
Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
1198+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
1199+
unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
1200+
1201+
Record.push_back(AST_BLOCK_HASH);
1202+
Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
1203+
ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11631204
Record.clear();
1164-
Record.append(Signature.begin(), Signature.end());
1165-
Stream.EmitRecord(SIGNATURE, Record);
1205+
1206+
Record.push_back(SIGNATURE);
1207+
Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
1208+
SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
11661209
Record.clear();
11671210
}
11681211

@@ -1242,7 +1285,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
12421285

12431286
// Leave the options block.
12441287
Stream.ExitBlock();
1245-
return Signature;
1288+
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
12461289
}
12471290

12481291
/// Write the control block.
@@ -1600,6 +1643,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
16001643
IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
16011644
unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev));
16021645

1646+
uint64_t InputFilesOffsetBase = Stream.GetCurrentBitNo();
1647+
16031648
// Get all ContentCache objects for files.
16041649
std::vector<InputFileEntry> UserFiles;
16051650
std::vector<InputFileEntry> SystemFiles;
@@ -1663,7 +1708,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
16631708
continue; // already recorded this file.
16641709

16651710
// Record this entry's offset.
1666-
InputFileOffsets.push_back(Stream.GetCurrentBitNo());
1711+
InputFileOffsets.push_back(Stream.GetCurrentBitNo() - InputFilesOffsetBase);
16671712

16681713
InputFileID = InputFileOffsets.size();
16691714

@@ -4654,8 +4699,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
46544699
ASTContext &Context = SemaRef.Context;
46554700
Preprocessor &PP = SemaRef.PP;
46564701

4702+
// This needs to be done very early, since everything that writes
4703+
// SourceLocations or FileIDs depends on it.
46574704
collectNonAffectingInputFiles();
46584705

4706+
writeUnhashedControlBlock(PP, Context);
4707+
46594708
// Set up predefined declaration IDs.
46604709
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
46614710
if (D) {
@@ -4801,7 +4850,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
48014850

48024851
// Write the remaining AST contents.
48034852
Stream.FlushToWord();
4804-
ASTBlockRange.first = Stream.GetCurrentBitNo();
4853+
ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
48054854
Stream.EnterSubblock(AST_BLOCK_ID, 5);
48064855
ASTBlockStartOffset = Stream.GetCurrentBitNo();
48074856

@@ -5158,13 +5207,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
51585207
Stream.EmitRecord(STATISTICS, Record);
51595208
Stream.ExitBlock();
51605209
Stream.FlushToWord();
5161-
ASTBlockRange.second = Stream.GetCurrentBitNo();
5210+
ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
51625211

51635212
// Write the module file extension blocks.
51645213
for (const auto &ExtWriter : ModuleFileExtensionWriters)
51655214
WriteModuleFileExtension(SemaRef, *ExtWriter);
51665215

5167-
return writeUnhashedControlBlock(PP, Context);
5216+
return backpatchSignature();
51685217
}
51695218

51705219
void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

clang/lib/Serialization/GlobalModuleIndex.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -706,9 +706,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
706706
}
707707

708708
// Get Signature.
709-
if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
710-
getModuleFileInfo(File).Signature = ASTFileSignature::create(
711-
Record.begin(), Record.begin() + ASTFileSignature::size);
709+
if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
710+
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
711+
assert(Signature != ASTFileSignature::createDummy() &&
712+
"Dummy AST file signature not backpatched in ASTWriter.");
713+
getModuleFileInfo(File).Signature = Signature;
714+
}
712715

713716
// We don't care about this record.
714717
}

0 commit comments

Comments
 (0)