Skip to content

Commit 9b95a6e

Browse files
Merge pull request #7952 from rastogishubham/uleboptimize
Optimize ULEB decoding to improve MCCAS replay time
2 parents f981c3b + 8b7559b commit 9b95a6e

File tree

1 file changed

+93
-92
lines changed

1 file changed

+93
-92
lines changed

llvm/lib/MCCAS/MCCASObjectV1.cpp

Lines changed: 93 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,13 @@ static Error materializeDebugInfoOpt(MCCASReader &Reader,
505505
StringRef FormData, bool) {
506506
if (Form == dwarf::Form::DW_FORM_ref4_cas ||
507507
Form == dwarf::Form::DW_FORM_strp_cas) {
508-
auto Reader = BinaryStreamReader(FormData, endianness::little);
509-
uint64_t Data64;
510-
if (auto Err = Reader.readULEB128(Data64))
511-
handleAllErrors(std::move(Err));
508+
DataExtractor Extractor(FormData, true, 8);
509+
DataExtractor::Cursor Cursor(0);
510+
uint64_t Data64 = Extractor.getULEB128(Cursor);
511+
if (!Cursor)
512+
handleAllErrors(Cursor.takeError());
512513
uint32_t Data32 = Data64;
513-
assert(Data32 == Data64 && Reader.empty());
514+
assert(Data32 == Data64 && Extractor.eof(Cursor));
514515
SectionStream->write(reinterpret_cast<char *>(&Data32), sizeof(Data32));
515516
} else
516517
*SectionStream << FormData;
@@ -1710,13 +1711,14 @@ Error MCCASBuilder::createStringSection(
17101711
/// Reads and returns the length field of a dwarf header contained in Reader,
17111712
/// assuming Reader is positioned at the beginning of the header. The Reader's
17121713
/// state is advanced to the first byte after the header.
1713-
static Expected<size_t> getSizeFromDwarfHeader(BinaryStreamReader &Reader) {
1714+
static Expected<size_t> getSizeFromDwarfHeader(DataExtractor &Extractor,
1715+
DataExtractor::Cursor &Cursor) {
17141716
// From DWARF 5 section 7.4:
17151717
// In the 32-bit DWARF format, an initial length field [...] is an unsigned
17161718
// 4-byte integer (which must be less than 0xfffffff0);
1717-
uint32_t Word1;
1718-
if (auto E = Reader.readInteger(Word1))
1719-
return std::move(E);
1719+
uint32_t Word1 = Extractor.getU32(Cursor);
1720+
if (!Cursor)
1721+
return Cursor.takeError();
17201722

17211723
// TODO: handle 64-bit DWARF format.
17221724
if (Word1 >= 0xfffffff0)
@@ -1726,17 +1728,6 @@ static Expected<size_t> getSizeFromDwarfHeader(BinaryStreamReader &Reader) {
17261728
return Word1;
17271729
}
17281730

1729-
// TODO: Remove
1730-
Expected<size_t>
1731-
mccasformats::v1::getSizeFromDwarfHeaderAndSkip(BinaryStreamReader &Reader) {
1732-
Expected<size_t> Size = getSizeFromDwarfHeader(Reader);
1733-
if (!Size)
1734-
return Size.takeError();
1735-
if (auto E = Reader.skip(*Size))
1736-
return std::move(E);
1737-
return Size;
1738-
}
1739-
17401731
/// Returns the Abbreviation Offset field of a Dwarf Compilation Unit (CU)
17411732
/// contained in CUData, as well as the total number of bytes taken by the CU.
17421733
/// Note: this is different from the length field of the Dwarf header, which
@@ -1745,43 +1736,44 @@ static Expected<CUInfo>
17451736
getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17461737
endianness Endian,
17471738
std::optional<uint32_t> NewOffset) {
1748-
BinaryStreamReader Reader(toStringRef(CUData), Endian);
1749-
Expected<size_t> Size = getSizeFromDwarfHeader(Reader);
1739+
DataExtractor Extractor(toStringRef(CUData), Endian == endianness::little, 8);
1740+
DataExtractor::Cursor Cursor(0);
1741+
Expected<size_t> Size = getSizeFromDwarfHeader(Extractor, Cursor);
17501742
if (!Size)
17511743
return Size.takeError();
17521744

1753-
size_t AfterSizeOffset = Reader.getOffset();
1745+
size_t AfterSizeOffset = Cursor.tell();
17541746

17551747
// 2-byte Dwarf version identifier.
1756-
uint16_t DwarfVersion;
1757-
if (auto E = Reader.readInteger(DwarfVersion))
1758-
return std::move(E);
1748+
uint16_t DwarfVersion = Extractor.getU16(Cursor);
1749+
if (!Cursor)
1750+
return Cursor.takeError();
17591751

17601752
if (DwarfVersion >= 5) {
17611753
// From Dwarf 5 Section 7.5.1.1:
17621754
// Compile Unit Header Format is now changed with unit_type and address_size
17631755
// after the version. Parse both values from the header.
1764-
uint8_t UnitType;
1765-
if (auto E = Reader.readInteger(UnitType))
1766-
return std::move(E);
1756+
uint8_t UnitType = Extractor.getU8(Cursor);
1757+
if (!Cursor)
1758+
return Cursor.takeError();
17671759
if (UnitType != dwarf::DW_UT_compile)
17681760
return createStringError(
17691761
inconvertibleErrorCode(),
17701762
"Unit type is not DW_UT_compile, and is incompatible with MCCAS!");
1771-
uint8_t AddressSize;
1772-
if (auto E = Reader.readInteger(AddressSize))
1773-
return std::move(E);
1763+
uint8_t AddressSize = Extractor.getU8(Cursor);
1764+
if (!Cursor)
1765+
return Cursor.takeError();
17741766
if (AddressSize != 8)
17751767
return createStringError(
17761768
inconvertibleErrorCode(),
17771769
"Address size is not 8 bytes, unsupported architecture for MCCAS!");
17781770
}
17791771

17801772
// TODO: Handle Dwarf 64 format, which uses 8 bytes.
1781-
size_t AbbrevPosition = Reader.getOffset();
1782-
uint32_t AbbrevOffset;
1783-
if (auto E = Reader.readInteger(AbbrevOffset))
1784-
return std::move(E);
1773+
size_t AbbrevPosition = Cursor.tell();
1774+
uint32_t AbbrevOffset = Extractor.getU32(Cursor);
1775+
if (!Cursor)
1776+
return Cursor.takeError();
17851777

17861778
if (NewOffset.has_value()) {
17871779
// FIXME: safe but ugly cast. Similar to: llvm::arrayRefFromStringRef.
@@ -1793,11 +1785,8 @@ getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17931785
return std::move(E);
17941786
}
17951787

1796-
Reader.setOffset(AfterSizeOffset);
1797-
if (auto E = Reader.skip(*Size))
1798-
return std::move(E);
1799-
1800-
return CUInfo{Reader.getOffset(), AbbrevOffset, DwarfVersion};
1788+
Cursor.seek(AfterSizeOffset + *Size);
1789+
return CUInfo{Cursor.tell(), AbbrevOffset, DwarfVersion};
18011790
}
18021791

18031792
/// Given a list of MCFragments, return a vector with the concatenation of their
@@ -3125,14 +3114,15 @@ struct DIEVisitor {
31253114

31263115
Error visitDIERef(DIEDedupeTopLevelRef Ref);
31273116
Error visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack);
3128-
Error visitDIEAttrs(BinaryStreamReader &DataReader, StringRef DIEData,
3129-
ArrayRef<AbbrevContent> DIEContents);
3117+
Error visitDIEAttrs(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3118+
StringRef DIEData, ArrayRef<AbbrevContent> DIEContents);
31303119
Error materializeAbbrevDIE(unsigned AbbrevIdx);
31313120

31323121
uint16_t DwarfVersion;
31333122
SmallVector<AbbrevEntry> AbbrevEntryCache;
31343123
ArrayRef<StringRef> AbbrevEntries;
3135-
BinaryStreamReader DistinctReader;
3124+
DataExtractor DistinctExtractor;
3125+
DataExtractor::Cursor DistinctCursor;
31363126
StringRef DistinctData;
31373127

31383128
std::function<void(StringRef)> HeaderCallback;
@@ -3143,7 +3133,8 @@ struct DIEVisitor {
31433133
std::function<void(StringRef)> NewBlockCallback;
31443134
};
31453135

3146-
Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
3136+
Error DIEVisitor::visitDIEAttrs(DataExtractor &Extractor,
3137+
DataExtractor::Cursor &Cursor,
31473138
StringRef DIEData,
31483139
ArrayRef<AbbrevContent> DIEContents) {
31493140
constexpr auto IsLittleEndian = true;
@@ -3153,29 +3144,32 @@ Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
31533144

31543145
for (auto Contents : DIEContents) {
31553146
bool DataInDistinct = Contents.FormInDistinctData;
3156-
auto &ReaderForData = DataInDistinct ? DistinctReader : DataReader;
3147+
auto &ExtractorForData = DataInDistinct ? DistinctExtractor : Extractor;
3148+
auto &CursorForData = DataInDistinct ? DistinctCursor : Cursor;
31573149
StringRef DataToUse = DataInDistinct ? DistinctData : DIEData;
31583150
Expected<uint64_t> FormSize =
31593151
Contents.FormSize
31603152
? *Contents.FormSize
31613153
: getFormSize(Contents.Form, FormParams, DataToUse,
3162-
ReaderForData.getOffset(), IsLittleEndian, AddrSize);
3154+
CursorForData.tell(), IsLittleEndian, AddrSize);
31633155
if (!FormSize)
31643156
return FormSize.takeError();
31653157

3166-
ArrayRef<char> RawBytes;
3167-
if (auto E = ReaderForData.readArray(RawBytes, *FormSize))
3168-
return E;
3169-
AttrCallback(Contents.Attr, Contents.Form, toStringRef(RawBytes),
3170-
DataInDistinct);
3158+
StringRef RawBytes;
3159+
if (*FormSize)
3160+
RawBytes = ExtractorForData.getBytes(CursorForData, *FormSize);
3161+
if (!CursorForData)
3162+
return CursorForData.takeError();
3163+
AttrCallback(Contents.Attr, Contents.Form, RawBytes, DataInDistinct);
31713164
}
31723165
return Error::success();
31733166
}
31743167

3175-
static Expected<uint64_t> readAbbrevIdx(BinaryStreamReader &Reader) {
3176-
uint64_t Idx;
3177-
if (auto E = Reader.readULEB128(Idx))
3178-
return std::move(E);
3168+
static Expected<uint64_t> readAbbrevIdx(DataExtractor &Extractor,
3169+
DataExtractor::Cursor &Cursor) {
3170+
uint64_t Idx = Extractor.getULEB128(Cursor);
3171+
if (!Cursor)
3172+
return Cursor.takeError();
31793173
return Idx;
31803174
}
31813175

@@ -3296,12 +3290,13 @@ Error DIEVisitor::materializeAbbrevDIE(unsigned AbbrevIdx) {
32963290
/// implementation of a Depth First Search, and this function is used to
32973291
/// simulate a return from a recursive callback, by restoring the locals to a
32983292
/// previous stack frame.
3299-
static void popStack(BinaryStreamReader &Reader, StringRef &Data,
3293+
static void popStack(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3294+
StringRef &Data,
33003295
std::stack<std::pair<StringRef, unsigned>> &StackOfNodes) {
33013296
auto DataAndOffset = StackOfNodes.top();
3302-
Reader = BinaryStreamReader(DataAndOffset.first, llvm::endianness::little);
3297+
Extractor = DataExtractor(DataAndOffset.first, true, 8);
33033298
Data = DataAndOffset.first;
3304-
Reader.setOffset(DataAndOffset.second);
3299+
Cursor.seek(DataAndOffset.second);
33053300
StackOfNodes.pop();
33063301
}
33073302

@@ -3315,11 +3310,13 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33153310
std::stack<std::pair<StringRef, unsigned>> StackOfNodes;
33163311
auto Data = DIEChildrenStack.empty() ? StringRef()
33173312
: DIEChildrenStack.front().getData();
3318-
BinaryStreamReader Reader(Data, llvm::endianness::little);
3313+
DataExtractor Extractor(Data, true, 8);
3314+
DataExtractor::Cursor Cursor(0);
33193315

3320-
while (!DistinctReader.empty()) {
3316+
while (!DistinctExtractor.eof(DistinctCursor)) {
33213317

3322-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3318+
Expected<uint64_t> MaybeAbbrevIdx =
3319+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33233320
if (!MaybeAbbrevIdx)
33243321
return MaybeAbbrevIdx.takeError();
33253322
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3329,20 +3326,21 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33293326
// continue materialization of the parent's siblings that may exist.
33303327
if (AbbrevIdx == getEndOfDIESiblingsMarker()) {
33313328
EndTagCallback(true /*HadChildren*/);
3332-
if (!StackOfNodes.empty() && Reader.empty())
3333-
popStack(Reader, Data, StackOfNodes);
3329+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3330+
popStack(Extractor, Cursor, Data, StackOfNodes);
33343331
continue;
33353332
}
33363333

33373334
// If we see a DIEInAnotherBlockMarker, we know that the next DIE is in
33383335
// another CAS Block, we have to push the current CAS Object on the stack,
33393336
// and materialize the next DIE from the DIEChildrenStack.
33403337
if (AbbrevIdx == getDIEInAnotherBlockMarker()) {
3341-
StackOfNodes.push(std::make_pair(Data, Reader.getOffset()));
3338+
StackOfNodes.push(std::make_pair(Data, Cursor.tell()));
33423339
DIEChildrenStack = DIEChildrenStack.drop_front();
33433340
Data = DIEChildrenStack.front().getData();
33443341
NewBlockCallback(DIEChildrenStack.front().getID().toString());
3345-
Reader = BinaryStreamReader(Data, llvm::endianness::little);
3342+
Extractor = DataExtractor(Data, true, 8);
3343+
Cursor.seek(0);
33463344
continue;
33473345
}
33483346

@@ -3351,16 +3349,16 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33513349
AbbrevEntryCache[decodeAbbrevIndexAsAbbrevSetIdx(AbbrevIdx)];
33523350
StartTagCallback(AbbrevEntryCacheVal.Tag, AbbrevIdx);
33533351

3354-
if (auto E =
3355-
visitDIEAttrs(Reader, Data, AbbrevEntryCacheVal.AbbrevContents))
3352+
if (auto E = visitDIEAttrs(Extractor, Cursor, Data,
3353+
AbbrevEntryCacheVal.AbbrevContents))
33563354
return E;
33573355

33583356
// If the current DIE doesn't have any children, the current CAS Object will
33593357
// not contain any more data, pop the stack to continue materializing its
33603358
// parent's siblings that may exist.
33613359
if (!AbbrevEntryCacheVal.HasChildren) {
3362-
if (!StackOfNodes.empty() && Reader.empty())
3363-
popStack(Reader, Data, StackOfNodes);
3360+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3361+
popStack(Extractor, Cursor, Data, StackOfNodes);
33643362
EndTagCallback(false /*HadChildren*/);
33653363
}
33663364
}
@@ -3369,8 +3367,9 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33693367

33703368
Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33713369

3372-
auto Offset = DistinctReader.getOffset();
3373-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3370+
auto Offset = DistinctCursor.tell();
3371+
Expected<uint64_t> MaybeAbbrevIdx =
3372+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33743373
if (!MaybeAbbrevIdx)
33753374
return MaybeAbbrevIdx.takeError();
33763375
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3379,7 +3378,7 @@ Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33793378
assert(AbbrevIdx != getEndOfDIESiblingsMarker() &&
33803379
AbbrevIdx != getDIEInAnotherBlockMarker());
33813380

3382-
DistinctReader.setOffset(Offset);
3381+
DistinctCursor.seek(Offset);
33833382

33843383
NewBlockCallback(StartDIERef.getID().toString());
33853384

@@ -3417,34 +3416,36 @@ Error mccasformats::v1::visitDebugInfo(
34173416
compression::zlib::decompress(BuffRef, OutBuff, UncompressedSize))
34183417
return E;
34193418
DistinctData = toStringRef(OutBuff);
3420-
BinaryStreamReader DistinctReader(DistinctData, endianness::little);
3421-
#else
3422-
BinaryStreamReader DistinctReader(DistinctData, endianness::little);
34233419
#endif
3424-
ArrayRef<char> HeaderData;
3420+
DataExtractor DistinctExtractor(DistinctData, true, 8);
3421+
DataExtractor::Cursor DistinctCursor(0);
34253422

3426-
auto BeginOffset = DistinctReader.getOffset();
3427-
auto Size = getSizeFromDwarfHeader(DistinctReader);
3423+
auto Size = getSizeFromDwarfHeader(DistinctExtractor, DistinctCursor);
34283424
if (!Size)
34293425
return Size.takeError();
34303426

34313427
// 2-byte Dwarf version identifier.
3432-
uint16_t DwarfVersion;
3433-
if (auto E = DistinctReader.readInteger(DwarfVersion))
3434-
return E;
3435-
3436-
DistinctReader.setOffset(BeginOffset);
3428+
uint16_t DwarfVersion = DistinctExtractor.getU16(DistinctCursor);
3429+
DistinctCursor.seek(0);
34373430

3438-
if (auto E = DistinctReader.readArray(
3439-
HeaderData,
3440-
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit))
3441-
return E;
3442-
HeaderCallback(toStringRef(HeaderData));
3431+
StringRef HeaderData = DistinctExtractor.getBytes(
3432+
DistinctCursor,
3433+
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit);
3434+
if (!DistinctCursor)
3435+
return DistinctCursor.takeError();
3436+
HeaderCallback(HeaderData);
34433437

34443438
append_range(TotAbbrevEntries, LoadedTopRef->AbbrevEntries);
3445-
DIEVisitor Visitor{DwarfVersion, {}, TotAbbrevEntries,
3446-
DistinctReader, DistinctData, HeaderCallback,
3447-
StartTagCallback, AttrCallback, EndTagCallback,
3439+
DIEVisitor Visitor{DwarfVersion,
3440+
{},
3441+
TotAbbrevEntries,
3442+
DistinctExtractor,
3443+
DataExtractor::Cursor(DistinctCursor.tell()),
3444+
DistinctData,
3445+
HeaderCallback,
3446+
StartTagCallback,
3447+
AttrCallback,
3448+
EndTagCallback,
34483449
NewBlockCallback};
34493450
return Visitor.visitDIERef(LoadedTopRef->RootDIE);
34503451
}

0 commit comments

Comments
 (0)