Skip to content

[clang][deps] Cherry-pick changes to the unhashed control block #7395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Module.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
return Sentinel;
}

static ASTFileSignature createDummy() {
ASTFileSignature Dummy;
Dummy.fill(0x00);
return Dummy;
}

template <typename InputIt>
static ASTFileSignature create(InputIt First, InputIt Last) {
assert(std::distance(First, Last) == size &&
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
const unsigned VERSION_MAJOR = 28;
const unsigned VERSION_MAJOR = 29;

/// AST file minor version number supported by this version of
/// Clang.
Expand Down
18 changes: 12 additions & 6 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
/// The module we're currently writing, if any.
Module *WritingModule = nullptr;

/// The offset of the first bit inside the AST_BLOCK.
/// The byte range representing all the UNHASHED_CONTROL_BLOCK.
std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
/// The bit offset of the AST block hash blob.
uint64_t ASTBlockHashOffset = 0;
/// The bit offset of the signature blob.
uint64_t SignatureOffset = 0;

/// The bit offset of the first bit inside the AST_BLOCK.
uint64_t ASTBlockStartOffset = 0;

/// The range representing all the AST_BLOCK.
/// The byte range representing all the AST_BLOCK.
std::pair<uint64_t, uint64_t> ASTBlockRange;

/// The base directory for any relative paths we emit.
Expand Down Expand Up @@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
StringRef isysroot);

/// Write out the signature and diagnostic options, and return the signature.
ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
ASTContext &Context);
void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
ASTFileSignature backpatchSignature();

/// Calculate hash of the pcm content.
static std::pair<ASTFileSignature, ASTFileSignature>
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;

void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
void WriteSourceManagerBlock(SourceManager &SourceMgr,
Expand Down
5 changes: 4 additions & 1 deletion clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ class ModuleFile {
/// The cursor to the start of the input-files block.
llvm::BitstreamCursor InputFilesCursor;

/// Offsets for all of the input file entries in the AST file.
/// Absolute offset of the start of the input-files block.
uint64_t InputFilesOffsetBase = 0;

/// Relative offsets for all of the input file entries in the AST file.
const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;

/// The input files that have been loaded from this AST file.
Expand Down
45 changes: 26 additions & 19 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2278,7 +2278,8 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
// Go find this input file.
BitstreamCursor &Cursor = F.InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
if (llvm::Error Err = Cursor.JumpToBit(F.InputFilesOffsetBase +
F.InputFileOffsets[ID - 1])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
Expand Down Expand Up @@ -2362,7 +2363,8 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
// Go find this input file.
BitstreamCursor &Cursor = F.InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
if (llvm::Error Err = Cursor.JumpToBit(F.InputFilesOffsetBase +
F.InputFileOffsets[ID - 1])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
Expand Down Expand Up @@ -2732,6 +2734,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
Error("malformed block record in AST file");
return Failure;
}
F.InputFilesOffsetBase = F.InputFilesCursor.GetCurrentBitNo();
continue;

case OPTIONS_BLOCK_ID:
Expand Down Expand Up @@ -4677,12 +4680,6 @@ ASTReader::ReadASTCore(StringRef FileName,
ShouldFinalizePCM = true;
return Success;

case UNHASHED_CONTROL_BLOCK_ID:
// This block is handled using look-ahead during ReadControlBlock. We
// shouldn't get here!
Error("malformed block record in AST file");
return Failure;

default:
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
Expand Down Expand Up @@ -4807,13 +4804,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
}
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
case SIGNATURE:
if (F)
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
if (F) {
F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
assert(F->Signature != ASTFileSignature::createDummy() &&
"Dummy AST file signature not backpatched in ASTWriter.");
}
break;
case AST_BLOCK_HASH:
if (F)
F->ASTBlockHash =
ASTFileSignature::create(Record.begin(), Record.end());
if (F) {
F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
"Dummy AST block hash not backpatched in ASTWriter.");
}
break;
case DIAGNOSTIC_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
Expand Down Expand Up @@ -5123,9 +5125,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
consumeError(MaybeRecord.takeError());
return ASTFileSignature();
}
if (SIGNATURE == MaybeRecord.get())
return ASTFileSignature::create(Record.begin(),
Record.begin() + ASTFileSignature::size);
if (SIGNATURE == MaybeRecord.get()) {
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
assert(Signature != ASTFileSignature::createDummy() &&
"Dummy AST file signature not backpatched in ASTWriter.");
return Signature;
}
}
}

Expand Down Expand Up @@ -5287,6 +5292,7 @@ bool ASTReader::readASTFileControlBlock(
bool NeedsSystemInputFiles = Listener.needsSystemInputFileVisitation();
bool NeedsImports = Listener.needsImportVisitation();
BitstreamCursor InputFilesCursor;
uint64_t InputFilesOffsetBase = 0;

RecordData Record;
std::string ModuleDir;
Expand Down Expand Up @@ -5322,6 +5328,7 @@ bool ASTReader::readASTFileControlBlock(
if (NeedsInputFiles &&
ReadBlockAbbrevs(InputFilesCursor, INPUT_FILES_BLOCK_ID))
return true;
InputFilesOffsetBase = InputFilesCursor.GetCurrentBitNo();
break;

default:
Expand Down Expand Up @@ -5394,7 +5401,8 @@ bool ASTReader::readASTFileControlBlock(

BitstreamCursor &Cursor = InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(InputFileOffs[I])) {
if (llvm::Error Err =
Cursor.JumpToBit(InputFilesOffsetBase + InputFileOffs[I])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
Expand Down Expand Up @@ -6476,8 +6484,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
// Read the final state.
assert(Idx < Record.size() &&
"Invalid data, missing final pragma diagnostic state");
SourceLocation CurStateLoc =
ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]);
SourceLocation CurStateLoc = ReadSourceLocation(F, Record[Idx++]);
auto *CurState = ReadDiagState(*FirstState, false);

if (!F.isModule()) {
Expand Down
103 changes: 76 additions & 27 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,50 +1119,93 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
}

std::pair<ASTFileSignature, ASTFileSignature>
ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
ASTWriter::createSignature() const {
StringRef AllBytes(Buffer.data(), Buffer.size());

llvm::SHA1 Hasher;
Hasher.update(ASTBlockBytes);
Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());

// Add the remaining bytes (i.e. bytes before the unhashed control block that
// are not part of the AST block).
Hasher.update(
AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
// Add the remaining bytes:
// 1. Before the unhashed control block.
Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
// 2. Between the unhashed control block and the AST block.
Hasher.update(
AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
// 3. After the AST block.
Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());

return std::make_pair(ASTBlockHash, Signature);
}

ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
ASTContext &Context) {
ASTFileSignature ASTWriter::backpatchSignature() {
if (!WritingModule ||
!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
return {};

// For implicit modules, write the hash of the PCM as its signature.

auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
for (uint8_t Byte : S) {
Stream.BackpatchByte(BitNo, Byte);
BitNo += 8;
}
};

ASTFileSignature ASTBlockHash;
ASTFileSignature Signature;
std::tie(ASTBlockHash, Signature) = createSignature();

BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
BackpatchSignatureAt(Signature, SignatureOffset);

return Signature;
}

void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
ASTContext &Context) {
using namespace llvm;

// Flush first to prepare the PCM hash (signature).
Stream.FlushToWord();
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;

// Enter the block and prepare to write records.
RecordData Record;
Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);

// For implicit modules, write the hash of the PCM as its signature.
ASTFileSignature Signature;
if (WritingModule &&
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
ASTFileSignature ASTBlockHash;
auto ASTBlockStartByte = ASTBlockRange.first >> 3;
auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
std::tie(ASTBlockHash, Signature) = createSignature(
StringRef(Buffer.begin(), StartOfUnhashedControl),
StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));

Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
Stream.EmitRecord(AST_BLOCK_HASH, Record);
// At this point, we don't know the actual signature of the file or the AST
// block - we're only able to compute those at the end of the serialization
// process. Let's store dummy signatures for now, and replace them with the
// real ones later on.
// The bitstream VBR-encodes record elements, which makes backpatching them
// really difficult. Let's store the signatures as blobs instead - they are
// guaranteed to be word-aligned, and we control their format/encoding.
auto Dummy = ASTFileSignature::createDummy();
SmallString<128> Blob{Dummy.begin(), Dummy.end()};

auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

Record.push_back(AST_BLOCK_HASH);
Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
Record.clear();
Record.append(Signature.begin(), Signature.end());
Stream.EmitRecord(SIGNATURE, Record);

Record.push_back(SIGNATURE);
Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
Record.clear();
}

Expand Down Expand Up @@ -1242,7 +1285,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,

// Leave the options block.
Stream.ExitBlock();
return Signature;
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
}

/// Write the control block.
Expand Down Expand Up @@ -1600,6 +1643,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev));

uint64_t InputFilesOffsetBase = Stream.GetCurrentBitNo();

// Get all ContentCache objects for files.
std::vector<InputFileEntry> UserFiles;
std::vector<InputFileEntry> SystemFiles;
Expand Down Expand Up @@ -1663,7 +1708,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
continue; // already recorded this file.

// Record this entry's offset.
InputFileOffsets.push_back(Stream.GetCurrentBitNo());
InputFileOffsets.push_back(Stream.GetCurrentBitNo() - InputFilesOffsetBase);

InputFileID = InputFileOffsets.size();

Expand Down Expand Up @@ -4654,8 +4699,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
ASTContext &Context = SemaRef.Context;
Preprocessor &PP = SemaRef.PP;

// This needs to be done very early, since everything that writes
// SourceLocations or FileIDs depends on it.
collectNonAffectingInputFiles();

writeUnhashedControlBlock(PP, Context);

// Set up predefined declaration IDs.
auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
if (D) {
Expand Down Expand Up @@ -4801,7 +4850,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,

// Write the remaining AST contents.
Stream.FlushToWord();
ASTBlockRange.first = Stream.GetCurrentBitNo();
ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
Stream.EnterSubblock(AST_BLOCK_ID, 5);
ASTBlockStartOffset = Stream.GetCurrentBitNo();

Expand Down Expand Up @@ -5158,13 +5207,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
Stream.EmitRecord(STATISTICS, Record);
Stream.ExitBlock();
Stream.FlushToWord();
ASTBlockRange.second = Stream.GetCurrentBitNo();
ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;

// Write the module file extension blocks.
for (const auto &ExtWriter : ModuleFileExtensionWriters)
WriteModuleFileExtension(SemaRef, *ExtWriter);

return writeUnhashedControlBlock(PP, Context);
return backpatchSignature();
}

void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Serialization/GlobalModuleIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,9 +706,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
}

// Get Signature.
if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
getModuleFileInfo(File).Signature = ASTFileSignature::create(
Record.begin(), Record.begin() + ASTFileSignature::size);
if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
assert(Signature != ASTFileSignature::createDummy() &&
"Dummy AST file signature not backpatched in ASTWriter.");
getModuleFileInfo(File).Signature = Signature;
}

// We don't care about this record.
}
Expand Down
Loading