Skip to content

Commit 8b7559b

Browse files
Change BinaryStreamReader to DataExtractor
BinaryStreamReader and DataExtractor do similar things, but BinaryStreamReader can also hold a buffer that is not contiguous which comes with some performance penalties. Replacing it with DataExtractor allows us to keep the same functionality but improve MCCAS replay time.
1 parent 0288fe7 commit 8b7559b

File tree

1 file changed

+93
-81
lines changed

1 file changed

+93
-81
lines changed

llvm/lib/MCCAS/MCCASObjectV1.cpp

Lines changed: 93 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,13 @@ static Error materializeDebugInfoOpt(MCCASReader &Reader,
505505
StringRef FormData, bool) {
506506
if (Form == dwarf::Form::DW_FORM_ref4_cas ||
507507
Form == dwarf::Form::DW_FORM_strp_cas) {
508-
auto Reader = BinaryStreamReader(FormData, endianness::little);
509-
uint64_t Data64;
510-
if (auto Err = Reader.readULEB128(Data64))
511-
handleAllErrors(std::move(Err));
508+
DataExtractor Extractor(FormData, true, 8);
509+
DataExtractor::Cursor Cursor(0);
510+
uint64_t Data64 = Extractor.getULEB128(Cursor);
511+
if (!Cursor)
512+
handleAllErrors(Cursor.takeError());
512513
uint32_t Data32 = Data64;
513-
assert(Data32 == Data64 && Reader.empty());
514+
assert(Data32 == Data64 && Extractor.eof(Cursor));
514515
SectionStream->write(reinterpret_cast<char *>(&Data32), sizeof(Data32));
515516
} else
516517
*SectionStream << FormData;
@@ -1710,13 +1711,14 @@ Error MCCASBuilder::createStringSection(
17101711
/// Reads and returns the length field of a dwarf header contained in Reader,
17111712
/// assuming Reader is positioned at the beginning of the header. The Reader's
17121713
/// state is advanced to the first byte after the header.
1713-
static Expected<size_t> getSizeFromDwarfHeader(BinaryStreamReader &Reader) {
1714+
static Expected<size_t> getSizeFromDwarfHeader(DataExtractor &Extractor,
1715+
DataExtractor::Cursor &Cursor) {
17141716
// From DWARF 5 section 7.4:
17151717
// In the 32-bit DWARF format, an initial length field [...] is an unsigned
17161718
// 4-byte integer (which must be less than 0xfffffff0);
1717-
uint32_t Word1;
1718-
if (auto E = Reader.readInteger(Word1))
1719-
return std::move(E);
1719+
uint32_t Word1 = Extractor.getU32(Cursor);
1720+
if (!Cursor)
1721+
return Cursor.takeError();
17201722

17211723
// TODO: handle 64-bit DWARF format.
17221724
if (Word1 >= 0xfffffff0)
@@ -1734,43 +1736,44 @@ static Expected<CUInfo>
17341736
getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17351737
endianness Endian,
17361738
std::optional<uint32_t> NewOffset) {
1737-
BinaryStreamReader Reader(toStringRef(CUData), Endian);
1738-
Expected<size_t> Size = getSizeFromDwarfHeader(Reader);
1739+
DataExtractor Extractor(toStringRef(CUData), Endian == endianness::little, 8);
1740+
DataExtractor::Cursor Cursor(0);
1741+
Expected<size_t> Size = getSizeFromDwarfHeader(Extractor, Cursor);
17391742
if (!Size)
17401743
return Size.takeError();
17411744

1742-
size_t AfterSizeOffset = Reader.getOffset();
1745+
size_t AfterSizeOffset = Cursor.tell();
17431746

17441747
// 2-byte Dwarf version identifier.
1745-
uint16_t DwarfVersion;
1746-
if (auto E = Reader.readInteger(DwarfVersion))
1747-
return std::move(E);
1748+
uint16_t DwarfVersion = Extractor.getU16(Cursor);
1749+
if (!Cursor)
1750+
return Cursor.takeError();
17481751

17491752
if (DwarfVersion >= 5) {
17501753
// From Dwarf 5 Section 7.5.1.1:
17511754
// Compile Unit Header Format is now changed with unit_type and address_size
17521755
// after the version. Parse both values from the header.
1753-
uint8_t UnitType;
1754-
if (auto E = Reader.readInteger(UnitType))
1755-
return std::move(E);
1756+
uint8_t UnitType = Extractor.getU8(Cursor);
1757+
if (!Cursor)
1758+
return Cursor.takeError();
17561759
if (UnitType != dwarf::DW_UT_compile)
17571760
return createStringError(
17581761
inconvertibleErrorCode(),
17591762
"Unit type is not DW_UT_compile, and is incompatible with MCCAS!");
1760-
uint8_t AddressSize;
1761-
if (auto E = Reader.readInteger(AddressSize))
1762-
return std::move(E);
1763+
uint8_t AddressSize = Extractor.getU8(Cursor);
1764+
if (!Cursor)
1765+
return Cursor.takeError();
17631766
if (AddressSize != 8)
17641767
return createStringError(
17651768
inconvertibleErrorCode(),
17661769
"Address size is not 8 bytes, unsupported architecture for MCCAS!");
17671770
}
17681771

17691772
// TODO: Handle Dwarf 64 format, which uses 8 bytes.
1770-
size_t AbbrevPosition = Reader.getOffset();
1771-
uint32_t AbbrevOffset;
1772-
if (auto E = Reader.readInteger(AbbrevOffset))
1773-
return std::move(E);
1773+
size_t AbbrevPosition = Cursor.tell();
1774+
uint32_t AbbrevOffset = Extractor.getU32(Cursor);
1775+
if (!Cursor)
1776+
return Cursor.takeError();
17741777

17751778
if (NewOffset.has_value()) {
17761779
// FIXME: safe but ugly cast. Similar to: llvm::arrayRefFromStringRef.
@@ -1782,11 +1785,8 @@ getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17821785
return std::move(E);
17831786
}
17841787

1785-
Reader.setOffset(AfterSizeOffset);
1786-
if (auto E = Reader.skip(*Size))
1787-
return std::move(E);
1788-
1789-
return CUInfo{Reader.getOffset(), AbbrevOffset, DwarfVersion};
1788+
Cursor.seek(AfterSizeOffset + *Size);
1789+
return CUInfo{Cursor.tell(), AbbrevOffset, DwarfVersion};
17901790
}
17911791

17921792
/// Given a list of MCFragments, return a vector with the concatenation of their
@@ -3114,14 +3114,15 @@ struct DIEVisitor {
31143114

31153115
Error visitDIERef(DIEDedupeTopLevelRef Ref);
31163116
Error visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack);
3117-
Error visitDIEAttrs(BinaryStreamReader &DataReader, StringRef DIEData,
3118-
ArrayRef<AbbrevContent> DIEContents);
3117+
Error visitDIEAttrs(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3118+
StringRef DIEData, ArrayRef<AbbrevContent> DIEContents);
31193119
Error materializeAbbrevDIE(unsigned AbbrevIdx);
31203120

31213121
uint16_t DwarfVersion;
31223122
SmallVector<AbbrevEntry> AbbrevEntryCache;
31233123
ArrayRef<StringRef> AbbrevEntries;
3124-
BinaryStreamReader DistinctReader;
3124+
DataExtractor DistinctExtractor;
3125+
DataExtractor::Cursor DistinctCursor;
31253126
StringRef DistinctData;
31263127

31273128
std::function<void(StringRef)> HeaderCallback;
@@ -3132,7 +3133,8 @@ struct DIEVisitor {
31323133
std::function<void(StringRef)> NewBlockCallback;
31333134
};
31343135

3135-
Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
3136+
Error DIEVisitor::visitDIEAttrs(DataExtractor &Extractor,
3137+
DataExtractor::Cursor &Cursor,
31363138
StringRef DIEData,
31373139
ArrayRef<AbbrevContent> DIEContents) {
31383140
constexpr auto IsLittleEndian = true;
@@ -3142,29 +3144,32 @@ Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
31423144

31433145
for (auto Contents : DIEContents) {
31443146
bool DataInDistinct = Contents.FormInDistinctData;
3145-
auto &ReaderForData = DataInDistinct ? DistinctReader : DataReader;
3147+
auto &ExtractorForData = DataInDistinct ? DistinctExtractor : Extractor;
3148+
auto &CursorForData = DataInDistinct ? DistinctCursor : Cursor;
31463149
StringRef DataToUse = DataInDistinct ? DistinctData : DIEData;
31473150
Expected<uint64_t> FormSize =
31483151
Contents.FormSize
31493152
? *Contents.FormSize
31503153
: getFormSize(Contents.Form, FormParams, DataToUse,
3151-
ReaderForData.getOffset(), IsLittleEndian, AddrSize);
3154+
CursorForData.tell(), IsLittleEndian, AddrSize);
31523155
if (!FormSize)
31533156
return FormSize.takeError();
31543157

3155-
ArrayRef<char> RawBytes;
3156-
if (auto E = ReaderForData.readArray(RawBytes, *FormSize))
3157-
return E;
3158-
AttrCallback(Contents.Attr, Contents.Form, toStringRef(RawBytes),
3159-
DataInDistinct);
3158+
StringRef RawBytes;
3159+
if (*FormSize)
3160+
RawBytes = ExtractorForData.getBytes(CursorForData, *FormSize);
3161+
if (!CursorForData)
3162+
return CursorForData.takeError();
3163+
AttrCallback(Contents.Attr, Contents.Form, RawBytes, DataInDistinct);
31603164
}
31613165
return Error::success();
31623166
}
31633167

3164-
static Expected<uint64_t> readAbbrevIdx(BinaryStreamReader &Reader) {
3165-
uint64_t Idx;
3166-
if (auto E = Reader.readULEB128(Idx))
3167-
return std::move(E);
3168+
static Expected<uint64_t> readAbbrevIdx(DataExtractor &Extractor,
3169+
DataExtractor::Cursor &Cursor) {
3170+
uint64_t Idx = Extractor.getULEB128(Cursor);
3171+
if (!Cursor)
3172+
return Cursor.takeError();
31683173
return Idx;
31693174
}
31703175

@@ -3285,12 +3290,13 @@ Error DIEVisitor::materializeAbbrevDIE(unsigned AbbrevIdx) {
32853290
/// implementation of a Depth First Search, and this function is used to
32863291
/// simulate a return from a recursive callback, by restoring the locals to a
32873292
/// previous stack frame.
3288-
static void popStack(BinaryStreamReader &Reader, StringRef &Data,
3293+
static void popStack(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3294+
StringRef &Data,
32893295
std::stack<std::pair<StringRef, unsigned>> &StackOfNodes) {
32903296
auto DataAndOffset = StackOfNodes.top();
3291-
Reader = BinaryStreamReader(DataAndOffset.first, llvm::endianness::little);
3297+
Extractor = DataExtractor(DataAndOffset.first, true, 8);
32923298
Data = DataAndOffset.first;
3293-
Reader.setOffset(DataAndOffset.second);
3299+
Cursor.seek(DataAndOffset.second);
32943300
StackOfNodes.pop();
32953301
}
32963302

@@ -3304,11 +3310,13 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33043310
std::stack<std::pair<StringRef, unsigned>> StackOfNodes;
33053311
auto Data = DIEChildrenStack.empty() ? StringRef()
33063312
: DIEChildrenStack.front().getData();
3307-
BinaryStreamReader Reader(Data, llvm::endianness::little);
3313+
DataExtractor Extractor(Data, true, 8);
3314+
DataExtractor::Cursor Cursor(0);
33083315

3309-
while (!DistinctReader.empty()) {
3316+
while (!DistinctExtractor.eof(DistinctCursor)) {
33103317

3311-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3318+
Expected<uint64_t> MaybeAbbrevIdx =
3319+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33123320
if (!MaybeAbbrevIdx)
33133321
return MaybeAbbrevIdx.takeError();
33143322
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3318,20 +3326,21 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33183326
// continue materialization of the parent's siblings that may exist.
33193327
if (AbbrevIdx == getEndOfDIESiblingsMarker()) {
33203328
EndTagCallback(true /*HadChildren*/);
3321-
if (!StackOfNodes.empty() && Reader.empty())
3322-
popStack(Reader, Data, StackOfNodes);
3329+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3330+
popStack(Extractor, Cursor, Data, StackOfNodes);
33233331
continue;
33243332
}
33253333

33263334
// If we see a DIEInAnotherBlockMarker, we know that the next DIE is in
33273335
// another CAS Block, we have to push the current CAS Object on the stack,
33283336
// and materialize the next DIE from the DIEChildrenStack.
33293337
if (AbbrevIdx == getDIEInAnotherBlockMarker()) {
3330-
StackOfNodes.push(std::make_pair(Data, Reader.getOffset()));
3338+
StackOfNodes.push(std::make_pair(Data, Cursor.tell()));
33313339
DIEChildrenStack = DIEChildrenStack.drop_front();
33323340
Data = DIEChildrenStack.front().getData();
33333341
NewBlockCallback(DIEChildrenStack.front().getID().toString());
3334-
Reader = BinaryStreamReader(Data, llvm::endianness::little);
3342+
Extractor = DataExtractor(Data, true, 8);
3343+
Cursor.seek(0);
33353344
continue;
33363345
}
33373346

@@ -3340,16 +3349,16 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33403349
AbbrevEntryCache[decodeAbbrevIndexAsAbbrevSetIdx(AbbrevIdx)];
33413350
StartTagCallback(AbbrevEntryCacheVal.Tag, AbbrevIdx);
33423351

3343-
if (auto E =
3344-
visitDIEAttrs(Reader, Data, AbbrevEntryCacheVal.AbbrevContents))
3352+
if (auto E = visitDIEAttrs(Extractor, Cursor, Data,
3353+
AbbrevEntryCacheVal.AbbrevContents))
33453354
return E;
33463355

33473356
// If the current DIE doesn't have any children, the current CAS Object will
33483357
// not contain any more data, pop the stack to continue materializing its
33493358
// parent's siblings that may exist.
33503359
if (!AbbrevEntryCacheVal.HasChildren) {
3351-
if (!StackOfNodes.empty() && Reader.empty())
3352-
popStack(Reader, Data, StackOfNodes);
3360+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3361+
popStack(Extractor, Cursor, Data, StackOfNodes);
33533362
EndTagCallback(false /*HadChildren*/);
33543363
}
33553364
}
@@ -3358,8 +3367,9 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33583367

33593368
Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33603369

3361-
auto Offset = DistinctReader.getOffset();
3362-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3370+
auto Offset = DistinctCursor.tell();
3371+
Expected<uint64_t> MaybeAbbrevIdx =
3372+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33633373
if (!MaybeAbbrevIdx)
33643374
return MaybeAbbrevIdx.takeError();
33653375
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3368,7 +3378,7 @@ Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33683378
assert(AbbrevIdx != getEndOfDIESiblingsMarker() &&
33693379
AbbrevIdx != getDIEInAnotherBlockMarker());
33703380

3371-
DistinctReader.setOffset(Offset);
3381+
DistinctCursor.seek(Offset);
33723382

33733383
NewBlockCallback(StartDIERef.getID().toString());
33743384

@@ -3406,34 +3416,36 @@ Error mccasformats::v1::visitDebugInfo(
34063416
compression::zlib::decompress(BuffRef, OutBuff, UncompressedSize))
34073417
return E;
34083418
DistinctData = toStringRef(OutBuff);
3409-
BinaryStreamReader DistinctReader(DistinctData, endianness::little);
3410-
#else
3411-
BinaryStreamReader DistinctReader(DistinctData, endianness::little);
34123419
#endif
3413-
ArrayRef<char> HeaderData;
3420+
DataExtractor DistinctExtractor(DistinctData, true, 8);
3421+
DataExtractor::Cursor DistinctCursor(0);
34143422

3415-
auto BeginOffset = DistinctReader.getOffset();
3416-
auto Size = getSizeFromDwarfHeader(DistinctReader);
3423+
auto Size = getSizeFromDwarfHeader(DistinctExtractor, DistinctCursor);
34173424
if (!Size)
34183425
return Size.takeError();
34193426

34203427
// 2-byte Dwarf version identifier.
3421-
uint16_t DwarfVersion;
3422-
if (auto E = DistinctReader.readInteger(DwarfVersion))
3423-
return E;
3424-
3425-
DistinctReader.setOffset(BeginOffset);
3428+
uint16_t DwarfVersion = DistinctExtractor.getU16(DistinctCursor);
3429+
DistinctCursor.seek(0);
34263430

3427-
if (auto E = DistinctReader.readArray(
3428-
HeaderData,
3429-
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit))
3430-
return E;
3431-
HeaderCallback(toStringRef(HeaderData));
3431+
StringRef HeaderData = DistinctExtractor.getBytes(
3432+
DistinctCursor,
3433+
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit);
3434+
if (!DistinctCursor)
3435+
return DistinctCursor.takeError();
3436+
HeaderCallback(HeaderData);
34323437

34333438
append_range(TotAbbrevEntries, LoadedTopRef->AbbrevEntries);
3434-
DIEVisitor Visitor{DwarfVersion, {}, TotAbbrevEntries,
3435-
DistinctReader, DistinctData, HeaderCallback,
3436-
StartTagCallback, AttrCallback, EndTagCallback,
3439+
DIEVisitor Visitor{DwarfVersion,
3440+
{},
3441+
TotAbbrevEntries,
3442+
DistinctExtractor,
3443+
DataExtractor::Cursor(DistinctCursor.tell()),
3444+
DistinctData,
3445+
HeaderCallback,
3446+
StartTagCallback,
3447+
AttrCallback,
3448+
EndTagCallback,
34373449
NewBlockCallback};
34383450
return Visitor.visitDIERef(LoadedTopRef->RootDIE);
34393451
}

0 commit comments

Comments
 (0)