Skip to content

[Serialization] Remove delta encoding optimization #145670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,6 @@ class ASTReader
using ModuleReverseIterator = ModuleManager::ModuleReverseIterator;

private:
using LocSeq = SourceLocationSequence;

/// The receiver of some callbacks invoked by ASTReader.
std::unique_ptr<ASTReaderListener> Listener;

Expand Down Expand Up @@ -2445,18 +2443,16 @@ class ASTReader
/// Read a source location from raw form and return it in its
/// originating module file's source location space.
std::pair<SourceLocation, unsigned>
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
return SourceLocationEncoding::decode(Raw, Seq);
ReadUntranslatedSourceLocation(RawLocEncoding Raw) const {
return SourceLocationEncoding::decode(Raw);
}

/// Read a source location from raw form.
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const {
if (!MF.ModuleOffsetMap.empty())
ReadModuleOffsetMap(MF);

auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw);
ModuleFile *OwningModuleFile =
ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1];

Expand Down Expand Up @@ -2484,9 +2480,9 @@ class ASTReader

/// Read a source location.
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
const RecordDataImpl &Record, unsigned &Idx,
LocSeq *Seq = nullptr) {
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
const RecordDataImpl &Record,
unsigned &Idx) {
return ReadSourceLocation(ModuleFile, Record[Idx++]);
}

/// Read a FileID.
Expand All @@ -2505,7 +2501,7 @@ class ASTReader

/// Read a source range.
SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record,
unsigned &Idx, LocSeq *Seq = nullptr);
unsigned &Idx);

static llvm::BitVector ReadBitVector(const RecordData &Record,
const StringRef Blob);
Expand Down
11 changes: 5 additions & 6 deletions clang/include/clang/Serialization/ASTRecordReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class OMPChildren;
class ASTRecordReader
: public serialization::DataStreamBasicReader<ASTRecordReader> {
using ModuleFile = serialization::ModuleFile;
using LocSeq = SourceLocationSequence;

ASTReader *Reader;
ModuleFile *F;
Expand Down Expand Up @@ -160,7 +159,7 @@ class ASTRecordReader
TypeSourceInfo *readTypeSourceInfo();

/// Reads the location information for a type.
void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
void readTypeLoc(TypeLoc TL);

/// Map a local type ID within a given AST file to a global type ID.
serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const {
Expand Down Expand Up @@ -287,13 +286,13 @@ class ASTRecordReader
void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A);

/// Read a source location, advancing Idx.
SourceLocation readSourceLocation(LocSeq *Seq = nullptr) {
return Reader->ReadSourceLocation(*F, Record, Idx, Seq);
SourceLocation readSourceLocation() {
return Reader->ReadSourceLocation(*F, Record, Idx);
}

/// Read a source range, advancing Idx.
SourceRange readSourceRange(LocSeq *Seq = nullptr) {
return Reader->ReadSourceRange(*F, Record, Idx, Seq);
SourceRange readSourceRange() {
return Reader->ReadSourceRange(*F, Record, Idx);
}

/// Read an arbitrary constant value, advancing Idx.
Expand Down
11 changes: 5 additions & 6 deletions clang/include/clang/Serialization/ASTRecordWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class TypeLoc;
/// An object for streaming information to a record.
class ASTRecordWriter
: public serialization::DataStreamBasicWriter<ASTRecordWriter> {
using LocSeq = SourceLocationSequence;

ASTWriter *Writer;
ASTWriter::RecordDataImpl *Record;
Expand Down Expand Up @@ -147,8 +146,8 @@ class ASTRecordWriter
void AddFunctionDefinition(const FunctionDecl *FD);

/// Emit a source location.
void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) {
return Writer->AddSourceLocation(Loc, *Record, Seq);
void AddSourceLocation(SourceLocation Loc) {
return Writer->AddSourceLocation(Loc, *Record);
}
void writeSourceLocation(SourceLocation Loc) {
AddSourceLocation(Loc);
Expand All @@ -174,8 +173,8 @@ class ASTRecordWriter
}

/// Emit a source range.
void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) {
return Writer->AddSourceRange(Range, *Record, Seq);
void AddSourceRange(SourceRange Range) {
return Writer->AddSourceRange(Range, *Record);
}

void writeBool(bool Value) {
Expand Down Expand Up @@ -245,7 +244,7 @@ class ASTRecordWriter
void AddTypeSourceInfo(TypeSourceInfo *TInfo);

/// Emits source location information for a type. Does not emit the type.
void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
void AddTypeLoc(TypeLoc TL);

/// Emits a template argument location info.
void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,
Expand Down
10 changes: 3 additions & 7 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ class ASTWriter : public ASTDeserializationListener,
using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx,
serialization::UnsafeQualTypeDenseMapInfo>;

using LocSeq = SourceLocationSequence;

/// The bitstream writer used to emit this precompiled header.
llvm::BitstreamWriter &Stream;

Expand Down Expand Up @@ -733,16 +731,14 @@ class ASTWriter : public ASTDeserializationListener,
void AddFileID(FileID FID, RecordDataImpl &Record);

/// Emit a source location.
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
LocSeq *Seq = nullptr);
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record);

/// Return the raw encodings for source locations.
SourceLocationEncoding::RawLocEncoding
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
getRawSourceLocationEncoding(SourceLocation Loc);

/// Emit a source range.
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
LocSeq *Seq = nullptr);
void AddSourceRange(SourceRange Range, RecordDataImpl &Record);

/// Emit a reference to an identifier.
void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record);
Expand Down
114 changes: 6 additions & 108 deletions clang/include/clang/Serialization/SourceLocationEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
// space in case the index of the module file is 0.
//
// Specially, if the index of the module file is 0, we allow to encode a
// sequence of locations we store only differences between successive elements.
//
//===----------------------------------------------------------------------===//

Expand All @@ -38,7 +36,6 @@
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H

namespace clang {
class SourceLocationSequence;

/// Serialized encoding of SourceLocations without context.
/// Optimized to have small unsigned values (=> small after VBR encoding).
Expand All @@ -54,119 +51,22 @@ class SourceLocationEncoding {
static UIntTy decodeRaw(UIntTy Raw) {
return (Raw >> 1) | (Raw << (UIntBits - 1));
}
friend SourceLocationSequence;

public:
using RawLocEncoding = uint64_t;

static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence * = nullptr);
static std::pair<SourceLocation, unsigned>
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
};

/// Serialized encoding of a sequence of SourceLocations.
///
/// Optimized to produce small values when locations with the sequence are
/// similar. Each element can be delta-encoded against the last nonzero element.
///
/// Sequences should be started by creating a SourceLocationSequence::State,
/// and then passed around as SourceLocationSequence*. Example:
///
/// // establishes a sequence
/// void EmitTopLevelThing() {
/// SourceLocationSequence::State Seq;
/// EmitContainedThing(Seq);
/// EmitRecursiveThing(Seq);
/// }
///
/// // optionally part of a sequence
/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
/// }
///
/// // establishes a sequence if there isn't one already
/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
/// SourceLocationSequence::State Seq(ParentSeq);
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
/// EmitRecursiveThing(Seq);
/// }
///
class SourceLocationSequence {
using UIntTy = SourceLocation::UIntTy;
using EncodedTy = uint64_t;
constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");

// Prev stores the rotated last nonzero location.
UIntTy &Prev;

// Zig-zag encoding turns small signed integers into small unsigned integers.
// 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
static UIntTy zigZag(UIntTy V) {
UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
return Sign ^ (V << 1);
}
static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }

SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}

EncodedTy encodeRaw(UIntTy Raw) {
if (Raw == 0)
return 0;
UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
if (Prev == 0)
return Prev = Rotated;
UIntTy Delta = Rotated - Prev;
Prev = Rotated;
// Exactly one 33 bit value is possible! (1 << 32).
// This is because we have two representations of zero: trivial & relative.
return 1 + EncodedTy{zigZag(Delta)};
}
UIntTy decodeRaw(EncodedTy Encoded) {
if (Encoded == 0)
return 0;
if (Prev == 0)
return SourceLocationEncoding::decodeRaw(Prev = Encoded);
return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
}

public:
SourceLocation decode(EncodedTy Encoded) {
return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
}
EncodedTy encode(SourceLocation Loc) {
return encodeRaw(Loc.getRawEncoding());
}

class State;
};

/// This object establishes a SourceLocationSequence.
class SourceLocationSequence::State {
UIntTy Prev = 0;
SourceLocationSequence Seq;

public:
// If Parent is provided and non-null, then this root becomes part of that
// enclosing sequence instead of establishing a new one.
State(SourceLocationSequence *Parent = nullptr)
: Seq(Parent ? Parent->Prev : Prev) {}

// Implicit conversion for uniform use of roots vs propagated sequences.
operator SourceLocationSequence *() { return &Seq; }
unsigned BaseModuleFileIndex);
static std::pair<SourceLocation, unsigned> decode(RawLocEncoding);
};

inline SourceLocationEncoding::RawLocEncoding
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence *Seq) {
unsigned BaseModuleFileIndex) {
// If the source location is a local source location, we can try to optimize
// the similar sequences to only record the differences.
if (!BaseOffset)
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());

return encodeRaw(Loc.getRawEncoding());
if (Loc.isInvalid())
return 0;

Expand All @@ -183,13 +83,11 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
return Encoded;
}
inline std::pair<SourceLocation, unsigned>
SourceLocationEncoding::decode(RawLocEncoding Encoded,
SourceLocationSequence *Seq) {
SourceLocationEncoding::decode(RawLocEncoding Encoded) {
unsigned ModuleFileIndex = Encoded >> 32;

if (!ModuleFileIndex)
return {Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
ModuleFileIndex};

Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
Expand Down
28 changes: 11 additions & 17 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1914,10 +1914,9 @@ bool ASTReader::ReadSLocEntry(int ID) {
}

case SM_SLOC_EXPANSION_ENTRY: {
LocSeq::State Seq;
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]);
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]);
SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
Record[5], Record[4], ID,
BaseOffset + Record[0]);
Expand Down Expand Up @@ -7072,13 +7071,10 @@ QualType ASTReader::readTypeRecord(TypeID ID) {
namespace clang {

class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
using LocSeq = SourceLocationSequence;

ASTRecordReader &Reader;
LocSeq *Seq;

SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); }
SourceRange readSourceRange() { return Reader.readSourceRange(Seq); }
SourceLocation readSourceLocation() { return Reader.readSourceLocation(); }
SourceRange readSourceRange() { return Reader.readSourceRange(); }

TypeSourceInfo *GetTypeSourceInfo() {
return Reader.readTypeSourceInfo();
Expand All @@ -7093,8 +7089,7 @@ class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
}

public:
TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq)
: Reader(Reader), Seq(Seq) {}
TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}

// We want compile-time assurance that we've enumerated all of
// these, so unfortunately we have to declare them first, then
Expand Down Expand Up @@ -7458,9 +7453,8 @@ void TypeLocReader::VisitDependentBitIntTypeLoc(
TL.setNameLoc(readSourceLocation());
}

void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) {
LocSeq::State Seq(ParentSeq);
TypeLocReader TLR(*this, Seq);
void ASTRecordReader::readTypeLoc(TypeLoc TL) {
TypeLocReader TLR(*this);
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
TLR.Visit(TL);
}
Expand Down Expand Up @@ -10016,9 +10010,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() {
}

SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
unsigned &Idx, LocSeq *Seq) {
SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq);
SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq);
unsigned &Idx) {
SourceLocation beg = ReadSourceLocation(F, Record, Idx);
SourceLocation end = ReadSourceLocation(F, Record, Idx);
return SourceRange(beg, end);
}

Expand Down
Loading