Skip to content

Commit bb8c7e7

Browse files
Add AST_SIGNATURE record to unhashed control block of PCM files
Summary: This record is constructed by hashing the bytes of the AST block in a similiar fashion to the SIGNATURE record. This new signature only means anything if the AST block is fully relocatable, i.e. it does not embed absolute offsets within the PCM file. This change ensure this does not happen by replacing these offsets with offsets relative to the nearest relevant subblock of the AST block. Reviewers: Bigcheese, dexonsmith Subscribers: dexonsmith, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80383
1 parent fb80e67 commit bb8c7e7

File tree

12 files changed

+153
-46
lines changed

12 files changed

+153
-46
lines changed

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ namespace serialization {
4141
/// Version 4 of AST files also requires that the version control branch and
4242
/// revision match exactly, since there is no backward compatibility of
4343
/// AST files at this time.
44-
const unsigned VERSION_MAJOR = 10;
44+
const unsigned VERSION_MAJOR = 11;
4545

4646
/// AST file minor version number supported by this version of
4747
/// Clang.
@@ -242,14 +242,16 @@ class TypeIdx {
242242
/// Raw source location.
243243
unsigned Loc = 0;
244244

245-
/// Offset in the AST file. Keep structure alignment 32-bit and avoid
246-
/// padding gap because undefined value in the padding affects AST hash.
245+
/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
246+
/// structure alignment 32-bit and avoid padding gap because undefined
247+
/// value in the padding affects AST hash.
247248
UnderalignedInt64 BitOffset;
248249

249250
DeclOffset() = default;
250-
DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
251+
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
252+
uint64_t DeclTypesBlockStartOffset) {
251253
setLocation(Loc);
252-
setBitOffset(BitOffset);
254+
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
253255
}
254256

255257
void setLocation(SourceLocation L) {
@@ -260,12 +262,13 @@ class TypeIdx {
260262
return SourceLocation::getFromRawEncoding(Loc);
261263
}
262264

263-
void setBitOffset(uint64_t Offset) {
264-
BitOffset.setBitOffset(Offset);
265+
void setBitOffset(uint64_t Offset,
266+
const uint64_t DeclTypesBlockStartOffset) {
267+
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
265268
}
266269

267-
uint64_t getBitOffset() const {
268-
return BitOffset.getBitOffset();
270+
uint64_t getBitOffset(const uint64_t DeclTypesBlockStartOffset) const {
271+
return BitOffset.getBitOffset() + DeclTypesBlockStartOffset;
269272
}
270273
};
271274

@@ -394,6 +397,9 @@ class TypeIdx {
394397
/// Record code for the signature that identifiers this AST file.
395398
SIGNATURE = 1,
396399

400+
/// Record code for the content hash of the AST block.
401+
AST_BLOCK_HASH,
402+
397403
/// Record code for the diagnostic options table.
398404
DIAGNOSTIC_OPTIONS,
399405

clang/include/clang/Serialization/ASTReader.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1890,7 +1890,8 @@ class ASTReader
18901890
/// ReadBlockAbbrevs - Enter a subblock of the specified BlockID with the
18911891
/// specified cursor. Read the abbreviations that are at the top of the block
18921892
/// and then leave the cursor pointing into the block.
1893-
static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID);
1893+
static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID,
1894+
uint64_t *StartOfBlockOffset = nullptr);
18941895

18951896
/// Finds all the visible declarations with a given name.
18961897
/// The current implementation of this method just loads the entire

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/ADT/DenseMap.h"
2828
#include "llvm/ADT/DenseSet.h"
2929
#include "llvm/ADT/MapVector.h"
30+
#include "llvm/ADT/STLExtras.h"
3031
#include "llvm/ADT/SetVector.h"
3132
#include "llvm/ADT/SmallVector.h"
3233
#include "llvm/ADT/StringRef.h"
@@ -137,6 +138,12 @@ class ASTWriter : public ASTDeserializationListener,
137138
/// The module we're currently writing, if any.
138139
Module *WritingModule = nullptr;
139140

141+
/// The offset of the first bit inside the AST_BLOCK.
142+
uint64_t ASTBlockStartOffset = 0;
143+
144+
/// The range representing all the AST_BLOCK.
145+
std::pair<uint64_t, uint64_t> ASTBlockRange;
146+
140147
/// The base directory for any relative paths we emit.
141148
std::string BaseDirectory;
142149

@@ -206,6 +213,10 @@ class ASTWriter : public ASTDeserializationListener,
206213
/// the declaration's ID.
207214
std::vector<serialization::DeclOffset> DeclOffsets;
208215

216+
/// The offset of the DECLTYPES_BLOCK. The offsets in DeclOffsets
217+
/// are relative to this value.
218+
uint64_t DeclTypesBlockStartOffset = 0;
219+
209220
/// Sorted (by file offset) vector of pairs of file offset/DeclID.
210221
using LocDeclIDsTy =
211222
SmallVector<std::pair<unsigned, serialization::DeclID>, 64>;
@@ -441,7 +452,7 @@ class ASTWriter : public ASTDeserializationListener,
441452

442453
/// A list of the module file extension writers.
443454
std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
444-
ModuleFileExtensionWriters;
455+
ModuleFileExtensionWriters;
445456

446457
/// Retrieve or create a submodule ID for this module.
447458
unsigned getSubmoduleID(Module *Mod);
@@ -458,7 +469,8 @@ class ASTWriter : public ASTDeserializationListener,
458469
ASTContext &Context);
459470

460471
/// Calculate hash of the pcm content.
461-
static ASTFileSignature createSignature(StringRef Bytes);
472+
static std::pair<ASTFileSignature, ASTFileSignature>
473+
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
462474

463475
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
464476
bool Modules);

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ class ModuleFile {
168168
/// and modification time to identify this particular file.
169169
ASTFileSignature Signature;
170170

171+
/// The signature of the AST block of the module file, this can be used to
172+
/// unique module files based on AST contents.
173+
ASTFileSignature ASTBlockHash;
174+
171175
/// Whether this module has been directly imported by the
172176
/// user.
173177
bool DirectlyImported = false;
@@ -185,6 +189,9 @@ class ModuleFile {
185189
/// The global bit offset (or base) of this module
186190
uint64_t GlobalBitOffset = 0;
187191

192+
/// The bit offset of the AST block of this module.
193+
uint64_t ASTBlockStartOffset = 0;
194+
188195
/// The serialized bitstream data for this file.
189196
StringRef Data;
190197

@@ -242,6 +249,9 @@ class ModuleFile {
242249
/// Cursor used to read source location entries.
243250
llvm::BitstreamCursor SLocEntryCursor;
244251

252+
/// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
253+
uint64_t SourceManagerBlockStartOffset = 0;
254+
245255
/// The number of source location entries in this AST file.
246256
unsigned LocalNumSLocEntries = 0;
247257

@@ -409,11 +419,14 @@ class ModuleFile {
409419

410420
// === Declarations ===
411421

412-
/// DeclsCursor - This is a cursor to the start of the DECLS_BLOCK block. It
413-
/// has read all the abbreviations at the start of the block and is ready to
414-
/// jump around with these in context.
422+
/// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
423+
/// It has read all the abbreviations at the start of the block and is ready
424+
/// to jump around with these in context.
415425
llvm::BitstreamCursor DeclsCursor;
416426

427+
/// The offset to the start of the DECLTYPES_BLOCK block.
428+
uint64_t DeclsBlockStartOffset = 0;
429+
417430
/// The number of declarations in this AST file.
418431
unsigned LocalNumDecls = 0;
419432

clang/lib/Serialization/ASTReader.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,7 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
13361336
Error(std::move(Err));
13371337
return true;
13381338
}
1339+
F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo();
13391340

13401341
RecordData Record;
13411342
while (true) {
@@ -1628,13 +1629,17 @@ SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
16281629
/// Enter a subblock of the specified BlockID with the specified cursor. Read
16291630
/// the abbreviations that are at the top of the block and then leave the cursor
16301631
/// pointing into the block.
1631-
bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID) {
1632+
bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
1633+
uint64_t *StartOfBlockOffset) {
16321634
if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
16331635
// FIXME this drops errors on the floor.
16341636
consumeError(std::move(Err));
16351637
return true;
16361638
}
16371639

1640+
if (StartOfBlockOffset)
1641+
*StartOfBlockOffset = Cursor.GetCurrentBitNo();
1642+
16381643
while (true) {
16391644
uint64_t Offset = Cursor.GetCurrentBitNo();
16401645
Expected<unsigned> MaybeCode = Cursor.ReadCode();
@@ -2933,6 +2938,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
29332938
Error(std::move(Err));
29342939
return Failure;
29352940
}
2941+
F.ASTBlockStartOffset = Stream.GetCurrentBitNo();
29362942

29372943
// Read all of the records and blocks for the AST file.
29382944
RecordData Record;
@@ -2973,7 +2979,8 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
29732979
Error(std::move(Err));
29742980
return Failure;
29752981
}
2976-
if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID)) {
2982+
if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID,
2983+
&F.DeclsBlockStartOffset)) {
29772984
Error("malformed block record in AST file");
29782985
return Failure;
29792986
}
@@ -3377,7 +3384,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
33773384
F.SLocEntryOffsets = (const uint32_t *)Blob.data();
33783385
F.LocalNumSLocEntries = Record[0];
33793386
unsigned SLocSpaceSize = Record[1];
3380-
F.SLocEntryOffsetsBase = Record[2];
3387+
F.SLocEntryOffsetsBase = Record[2] + F.SourceManagerBlockStartOffset;
33813388
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
33823389
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
33833390
SLocSpaceSize);
@@ -3696,7 +3703,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
36963703
F.MacroOffsets = (const uint32_t *)Blob.data();
36973704
F.LocalNumMacros = Record[0];
36983705
unsigned LocalBaseMacroID = Record[1];
3699-
F.MacroOffsetsBase = Record[2];
3706+
F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset;
37003707
F.BaseMacroID = getTotalNumMacros();
37013708

37023709
if (F.LocalNumMacros > 0) {
@@ -3837,17 +3844,18 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
38373844

38383845
while (Data < DataEnd) {
38393846
// FIXME: Looking up dependency modules by filename is horrible. Let's
3840-
// start fixing this with prebuilt and explicit modules and see how it
3841-
// goes...
3847+
// start fixing this with prebuilt, explicit and implicit modules and see
3848+
// how it goes...
38423849
using namespace llvm::support;
38433850
ModuleKind Kind = static_cast<ModuleKind>(
38443851
endian::readNext<uint8_t, little, unaligned>(Data));
38453852
uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
38463853
StringRef Name = StringRef((const char*)Data, Len);
38473854
Data += Len;
3848-
ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule
3849-
? ModuleMgr.lookupByModuleName(Name)
3850-
: ModuleMgr.lookupByFileName(Name));
3855+
ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
3856+
Kind == MK_ImplicitModule
3857+
? ModuleMgr.lookupByModuleName(Name)
3858+
: ModuleMgr.lookupByFileName(Name));
38513859
if (!OM) {
38523860
std::string Msg =
38533861
"SourceLocation remap refers to unknown module, cannot find ";
@@ -4736,6 +4744,11 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
47364744
if (F)
47374745
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
47384746
break;
4747+
case AST_BLOCK_HASH:
4748+
if (F)
4749+
F->ASTBlockHash =
4750+
ASTFileSignature::create(Record.begin(), Record.end());
4751+
break;
47394752
case DIAGNOSTIC_OPTIONS: {
47404753
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
47414754
if (Listener && ValidateDiagnosticOptions &&
@@ -6350,7 +6363,8 @@ ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) {
63506363
assert(I != GlobalTypeMap.end() && "Corrupted global type map");
63516364
ModuleFile *M = I->second;
63526365
return RecordLocation(
6353-
M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
6366+
M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
6367+
M->DeclsBlockStartOffset);
63546368
}
63556369

63566370
static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {

clang/lib/Serialization/ASTReaderDecl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2869,7 +2869,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
28692869
const DeclOffset &DOffs =
28702870
M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
28712871
Loc = TranslateSourceLocation(*M, DOffs.getLocation());
2872-
return RecordLocation(M, DOffs.getBitOffset());
2872+
return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
28732873
}
28742874

28752875
ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {

0 commit comments

Comments
 (0)