Skip to content

Commit e90ab0e

Browse files
authored
[Serialization] Remove delta encoding optimization (#145670)
See the discussion in #145529. This will slightly increase the PCM size (~5%), some data (in-memory preamble size in clangd): - SemaExpr.cpp: 77MB -> 80MB - FindTarget.cpp: 71MB -> 75MB
1 parent 183ff08 commit e90ab0e

File tree

8 files changed

+53
-238
lines changed

8 files changed

+53
-238
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,6 @@ class ASTReader
464464
using ModuleReverseIterator = ModuleManager::ModuleReverseIterator;
465465

466466
private:
467-
using LocSeq = SourceLocationSequence;
468-
469467
/// The receiver of some callbacks invoked by ASTReader.
470468
std::unique_ptr<ASTReaderListener> Listener;
471469

@@ -2445,18 +2443,16 @@ class ASTReader
24452443
/// Read a source location from raw form and return it in its
24462444
/// originating module file's source location space.
24472445
std::pair<SourceLocation, unsigned>
2448-
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
2449-
LocSeq *Seq = nullptr) const {
2450-
return SourceLocationEncoding::decode(Raw, Seq);
2446+
ReadUntranslatedSourceLocation(RawLocEncoding Raw) const {
2447+
return SourceLocationEncoding::decode(Raw);
24512448
}
24522449

24532450
/// Read a source location from raw form.
2454-
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
2455-
LocSeq *Seq = nullptr) const {
2451+
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const {
24562452
if (!MF.ModuleOffsetMap.empty())
24572453
ReadModuleOffsetMap(MF);
24582454

2459-
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
2455+
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw);
24602456
ModuleFile *OwningModuleFile =
24612457
ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1];
24622458

@@ -2484,9 +2480,9 @@ class ASTReader
24842480

24852481
/// Read a source location.
24862482
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
2487-
const RecordDataImpl &Record, unsigned &Idx,
2488-
LocSeq *Seq = nullptr) {
2489-
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
2483+
const RecordDataImpl &Record,
2484+
unsigned &Idx) {
2485+
return ReadSourceLocation(ModuleFile, Record[Idx++]);
24902486
}
24912487

24922488
/// Read a FileID.
@@ -2505,7 +2501,7 @@ class ASTReader
25052501

25062502
/// Read a source range.
25072503
SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record,
2508-
unsigned &Idx, LocSeq *Seq = nullptr);
2504+
unsigned &Idx);
25092505

25102506
static llvm::BitVector ReadBitVector(const RecordData &Record,
25112507
const StringRef Blob);

clang/include/clang/Serialization/ASTRecordReader.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ class OMPChildren;
3232
class ASTRecordReader
3333
: public serialization::DataStreamBasicReader<ASTRecordReader> {
3434
using ModuleFile = serialization::ModuleFile;
35-
using LocSeq = SourceLocationSequence;
3635

3736
ASTReader *Reader;
3837
ModuleFile *F;
@@ -160,7 +159,7 @@ class ASTRecordReader
160159
TypeSourceInfo *readTypeSourceInfo();
161160

162161
/// Reads the location information for a type.
163-
void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
162+
void readTypeLoc(TypeLoc TL);
164163

165164
/// Map a local type ID within a given AST file to a global type ID.
166165
serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const {
@@ -287,13 +286,13 @@ class ASTRecordReader
287286
void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A);
288287

289288
/// Read a source location, advancing Idx.
290-
SourceLocation readSourceLocation(LocSeq *Seq = nullptr) {
291-
return Reader->ReadSourceLocation(*F, Record, Idx, Seq);
289+
SourceLocation readSourceLocation() {
290+
return Reader->ReadSourceLocation(*F, Record, Idx);
292291
}
293292

294293
/// Read a source range, advancing Idx.
295-
SourceRange readSourceRange(LocSeq *Seq = nullptr) {
296-
return Reader->ReadSourceRange(*F, Record, Idx, Seq);
294+
SourceRange readSourceRange() {
295+
return Reader->ReadSourceRange(*F, Record, Idx);
297296
}
298297

299298
/// Read an arbitrary constant value, advancing Idx.

clang/include/clang/Serialization/ASTRecordWriter.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class TypeLoc;
2929
/// An object for streaming information to a record.
3030
class ASTRecordWriter
3131
: public serialization::DataStreamBasicWriter<ASTRecordWriter> {
32-
using LocSeq = SourceLocationSequence;
3332

3433
ASTWriter *Writer;
3534
ASTWriter::RecordDataImpl *Record;
@@ -147,8 +146,8 @@ class ASTRecordWriter
147146
void AddFunctionDefinition(const FunctionDecl *FD);
148147

149148
/// Emit a source location.
150-
void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) {
151-
return Writer->AddSourceLocation(Loc, *Record, Seq);
149+
void AddSourceLocation(SourceLocation Loc) {
150+
return Writer->AddSourceLocation(Loc, *Record);
152151
}
153152
void writeSourceLocation(SourceLocation Loc) {
154153
AddSourceLocation(Loc);
@@ -174,8 +173,8 @@ class ASTRecordWriter
174173
}
175174

176175
/// Emit a source range.
177-
void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) {
178-
return Writer->AddSourceRange(Range, *Record, Seq);
176+
void AddSourceRange(SourceRange Range) {
177+
return Writer->AddSourceRange(Range, *Record);
179178
}
180179

181180
void writeBool(bool Value) {
@@ -245,7 +244,7 @@ class ASTRecordWriter
245244
void AddTypeSourceInfo(TypeSourceInfo *TInfo);
246245

247246
/// Emits source location information for a type. Does not emit the type.
248-
void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
247+
void AddTypeLoc(TypeLoc TL);
249248

250249
/// Emits a template argument location info.
251250
void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ class ASTWriter : public ASTDeserializationListener,
115115
using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx,
116116
serialization::UnsafeQualTypeDenseMapInfo>;
117117

118-
using LocSeq = SourceLocationSequence;
119-
120118
/// The bitstream writer used to emit this precompiled header.
121119
llvm::BitstreamWriter &Stream;
122120

@@ -733,16 +731,14 @@ class ASTWriter : public ASTDeserializationListener,
733731
void AddFileID(FileID FID, RecordDataImpl &Record);
734732

735733
/// Emit a source location.
736-
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
737-
LocSeq *Seq = nullptr);
734+
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record);
738735

739736
/// Return the raw encodings for source locations.
740737
SourceLocationEncoding::RawLocEncoding
741-
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
738+
getRawSourceLocationEncoding(SourceLocation Loc);
742739

743740
/// Emit a source range.
744-
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
745-
LocSeq *Seq = nullptr);
741+
void AddSourceRange(SourceRange Range, RecordDataImpl &Record);
746742

747743
/// Emit a reference to an identifier.
748744
void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record);

clang/include/clang/Serialization/SourceLocationEncoding.h

Lines changed: 6 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
2626
// space in case the index of the module file is 0.
2727
//
28-
// Specially, if the index of the module file is 0, we allow to encode a
29-
// sequence of locations we store only differences between successive elements.
3028
//
3129
//===----------------------------------------------------------------------===//
3230

@@ -38,7 +36,6 @@
3836
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
3937

4038
namespace clang {
41-
class SourceLocationSequence;
4239

4340
/// Serialized encoding of SourceLocations without context.
4441
/// Optimized to have small unsigned values (=> small after VBR encoding).
@@ -54,119 +51,22 @@ class SourceLocationEncoding {
5451
static UIntTy decodeRaw(UIntTy Raw) {
5552
return (Raw >> 1) | (Raw << (UIntBits - 1));
5653
}
57-
friend SourceLocationSequence;
5854

5955
public:
6056
using RawLocEncoding = uint64_t;
6157

6258
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
63-
unsigned BaseModuleFileIndex,
64-
SourceLocationSequence * = nullptr);
65-
static std::pair<SourceLocation, unsigned>
66-
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
67-
};
68-
69-
/// Serialized encoding of a sequence of SourceLocations.
70-
///
71-
/// Optimized to produce small values when locations with the sequence are
72-
/// similar. Each element can be delta-encoded against the last nonzero element.
73-
///
74-
/// Sequences should be started by creating a SourceLocationSequence::State,
75-
/// and then passed around as SourceLocationSequence*. Example:
76-
///
77-
/// // establishes a sequence
78-
/// void EmitTopLevelThing() {
79-
/// SourceLocationSequence::State Seq;
80-
/// EmitContainedThing(Seq);
81-
/// EmitRecursiveThing(Seq);
82-
/// }
83-
///
84-
/// // optionally part of a sequence
85-
/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
86-
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
87-
/// }
88-
///
89-
/// // establishes a sequence if there isn't one already
90-
/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
91-
/// SourceLocationSequence::State Seq(ParentSeq);
92-
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
93-
/// EmitRecursiveThing(Seq);
94-
/// }
95-
///
96-
class SourceLocationSequence {
97-
using UIntTy = SourceLocation::UIntTy;
98-
using EncodedTy = uint64_t;
99-
constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
100-
static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
101-
102-
// Prev stores the rotated last nonzero location.
103-
UIntTy &Prev;
104-
105-
// Zig-zag encoding turns small signed integers into small unsigned integers.
106-
// 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
107-
static UIntTy zigZag(UIntTy V) {
108-
UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
109-
return Sign ^ (V << 1);
110-
}
111-
static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
112-
113-
SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
114-
115-
EncodedTy encodeRaw(UIntTy Raw) {
116-
if (Raw == 0)
117-
return 0;
118-
UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
119-
if (Prev == 0)
120-
return Prev = Rotated;
121-
UIntTy Delta = Rotated - Prev;
122-
Prev = Rotated;
123-
// Exactly one 33 bit value is possible! (1 << 32).
124-
// This is because we have two representations of zero: trivial & relative.
125-
return 1 + EncodedTy{zigZag(Delta)};
126-
}
127-
UIntTy decodeRaw(EncodedTy Encoded) {
128-
if (Encoded == 0)
129-
return 0;
130-
if (Prev == 0)
131-
return SourceLocationEncoding::decodeRaw(Prev = Encoded);
132-
return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
133-
}
134-
135-
public:
136-
SourceLocation decode(EncodedTy Encoded) {
137-
return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
138-
}
139-
EncodedTy encode(SourceLocation Loc) {
140-
return encodeRaw(Loc.getRawEncoding());
141-
}
142-
143-
class State;
144-
};
145-
146-
/// This object establishes a SourceLocationSequence.
147-
class SourceLocationSequence::State {
148-
UIntTy Prev = 0;
149-
SourceLocationSequence Seq;
150-
151-
public:
152-
// If Parent is provided and non-null, then this root becomes part of that
153-
// enclosing sequence instead of establishing a new one.
154-
State(SourceLocationSequence *Parent = nullptr)
155-
: Seq(Parent ? Parent->Prev : Prev) {}
156-
157-
// Implicit conversion for uniform use of roots vs propagated sequences.
158-
operator SourceLocationSequence *() { return &Seq; }
59+
unsigned BaseModuleFileIndex);
60+
static std::pair<SourceLocation, unsigned> decode(RawLocEncoding);
15961
};
16062

16163
inline SourceLocationEncoding::RawLocEncoding
16264
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
163-
unsigned BaseModuleFileIndex,
164-
SourceLocationSequence *Seq) {
65+
unsigned BaseModuleFileIndex) {
16566
// If the source location is a local source location, we can try to optimize
16667
// the similar sequences to only record the differences.
16768
if (!BaseOffset)
168-
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169-
69+
return encodeRaw(Loc.getRawEncoding());
17070
if (Loc.isInvalid())
17171
return 0;
17272

@@ -183,13 +83,11 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
18383
return Encoded;
18484
}
18585
inline std::pair<SourceLocation, unsigned>
186-
SourceLocationEncoding::decode(RawLocEncoding Encoded,
187-
SourceLocationSequence *Seq) {
86+
SourceLocationEncoding::decode(RawLocEncoding Encoded) {
18887
unsigned ModuleFileIndex = Encoded >> 32;
18988

19089
if (!ModuleFileIndex)
191-
return {Seq ? Seq->decode(Encoded)
192-
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
90+
return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
19391
ModuleFileIndex};
19492

19593
Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);

clang/lib/Serialization/ASTReader.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,10 +1914,9 @@ bool ASTReader::ReadSLocEntry(int ID) {
19141914
}
19151915

19161916
case SM_SLOC_EXPANSION_ENTRY: {
1917-
LocSeq::State Seq;
1918-
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
1919-
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
1920-
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
1917+
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
1918+
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]);
1919+
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]);
19211920
SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
19221921
Record[5], Record[4], ID,
19231922
BaseOffset + Record[0]);
@@ -7072,13 +7071,10 @@ QualType ASTReader::readTypeRecord(TypeID ID) {
70727071
namespace clang {
70737072

70747073
class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
7075-
using LocSeq = SourceLocationSequence;
7076-
70777074
ASTRecordReader &Reader;
7078-
LocSeq *Seq;
70797075

7080-
SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); }
7081-
SourceRange readSourceRange() { return Reader.readSourceRange(Seq); }
7076+
SourceLocation readSourceLocation() { return Reader.readSourceLocation(); }
7077+
SourceRange readSourceRange() { return Reader.readSourceRange(); }
70827078

70837079
TypeSourceInfo *GetTypeSourceInfo() {
70847080
return Reader.readTypeSourceInfo();
@@ -7093,8 +7089,7 @@ class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
70937089
}
70947090

70957091
public:
7096-
TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq)
7097-
: Reader(Reader), Seq(Seq) {}
7092+
TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}
70987093

70997094
// We want compile-time assurance that we've enumerated all of
71007095
// these, so unfortunately we have to declare them first, then
@@ -7458,9 +7453,8 @@ void TypeLocReader::VisitDependentBitIntTypeLoc(
74587453
TL.setNameLoc(readSourceLocation());
74597454
}
74607455

7461-
void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) {
7462-
LocSeq::State Seq(ParentSeq);
7463-
TypeLocReader TLR(*this, Seq);
7456+
void ASTRecordReader::readTypeLoc(TypeLoc TL) {
7457+
TypeLocReader TLR(*this);
74647458
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
74657459
TLR.Visit(TL);
74667460
}
@@ -10025,9 +10019,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() {
1002510019
}
1002610020

1002710021
SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
10028-
unsigned &Idx, LocSeq *Seq) {
10029-
SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq);
10030-
SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq);
10022+
unsigned &Idx) {
10023+
SourceLocation beg = ReadSourceLocation(F, Record, Idx);
10024+
SourceLocation end = ReadSourceLocation(F, Record, Idx);
1003110025
return SourceRange(beg, end);
1003210026
}
1003310027

0 commit comments

Comments
 (0)