Skip to content

Commit a88e90c

Browse files
Change BinaryStreamReader to DataExtractor
BinaryStreamReader and DataExtractor do similar things, but BinaryStreamReader can also hold a buffer that is not contiguous which comes with some performance penalties. Replacing it with DataExtractor allows us to keep the same functionality but improve MCCAS replay time. (cherry picked from commit 8b7559b)
1 parent c38083a commit a88e90c

File tree

1 file changed

+94
-81
lines changed

1 file changed

+94
-81
lines changed

llvm/lib/MCCAS/MCCASObjectV1.cpp

Lines changed: 94 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,13 @@ static Error materializeDebugInfoOpt(MCCASReader &Reader,
505505
StringRef FormData, bool) {
506506
if (Form == dwarf::Form::DW_FORM_ref4_cas ||
507507
Form == dwarf::Form::DW_FORM_strp_cas) {
508-
auto Reader = BinaryStreamReader(FormData, support::endianness::little);
509-
uint64_t Data64;
510-
if (auto Err = Reader.readULEB128(Data64))
511-
handleAllErrors(std::move(Err));
508+
DataExtractor Extractor(FormData, true, 8);
509+
DataExtractor::Cursor Cursor(0);
510+
uint64_t Data64 = Extractor.getULEB128(Cursor);
511+
if (!Cursor)
512+
handleAllErrors(Cursor.takeError());
512513
uint32_t Data32 = Data64;
513-
assert(Data32 == Data64 && Reader.empty());
514+
assert(Data32 == Data64 && Extractor.eof(Cursor));
514515
SectionStream->write(reinterpret_cast<char *>(&Data32), sizeof(Data32));
515516
} else
516517
*SectionStream << FormData;
@@ -1703,13 +1704,14 @@ Error MCCASBuilder::createStringSection(
17031704
/// Reads and returns the length field of a dwarf header contained in Reader,
17041705
/// assuming Reader is positioned at the beginning of the header. The Reader's
17051706
/// state is advanced to the first byte after the header.
1706-
static Expected<size_t> getSizeFromDwarfHeader(BinaryStreamReader &Reader) {
1707+
static Expected<size_t> getSizeFromDwarfHeader(DataExtractor &Extractor,
1708+
DataExtractor::Cursor &Cursor) {
17071709
// From DWARF 5 section 7.4:
17081710
// In the 32-bit DWARF format, an initial length field [...] is an unsigned
17091711
// 4-byte integer (which must be less than 0xfffffff0);
1710-
uint32_t Word1;
1711-
if (auto E = Reader.readInteger(Word1))
1712-
return std::move(E);
1712+
uint32_t Word1 = Extractor.getU32(Cursor);
1713+
if (!Cursor)
1714+
return Cursor.takeError();
17131715

17141716
// TODO: handle 64-bit DWARF format.
17151717
if (Word1 >= 0xfffffff0)
@@ -1727,43 +1729,45 @@ static Expected<CUInfo>
17271729
getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17281730
support::endianness Endian,
17291731
std::optional<uint32_t> NewOffset) {
1730-
BinaryStreamReader Reader(toStringRef(CUData), Endian);
1731-
Expected<size_t> Size = getSizeFromDwarfHeader(Reader);
1732+
DataExtractor Extractor(toStringRef(CUData),
1733+
Endian == support::endianness::little, 8);
1734+
DataExtractor::Cursor Cursor(0);
1735+
Expected<size_t> Size = getSizeFromDwarfHeader(Extractor, Cursor);
17321736
if (!Size)
17331737
return Size.takeError();
17341738

1735-
size_t AfterSizeOffset = Reader.getOffset();
1739+
size_t AfterSizeOffset = Cursor.tell();
17361740

17371741
// 2-byte Dwarf version identifier.
1738-
uint16_t DwarfVersion;
1739-
if (auto E = Reader.readInteger(DwarfVersion))
1740-
return std::move(E);
1742+
uint16_t DwarfVersion = Extractor.getU16(Cursor);
1743+
if (!Cursor)
1744+
return Cursor.takeError();
17411745

17421746
if (DwarfVersion >= 5) {
17431747
// From Dwarf 5 Section 7.5.1.1:
17441748
// Compile Unit Header Format is now changed with unit_type and address_size
17451749
// after the version. Parse both values from the header.
1746-
uint8_t UnitType;
1747-
if (auto E = Reader.readInteger(UnitType))
1748-
return std::move(E);
1750+
uint8_t UnitType = Extractor.getU8(Cursor);
1751+
if (!Cursor)
1752+
return Cursor.takeError();
17491753
if (UnitType != dwarf::DW_UT_compile)
17501754
return createStringError(
17511755
inconvertibleErrorCode(),
17521756
"Unit type is not DW_UT_compile, and is incompatible with MCCAS!");
1753-
uint8_t AddressSize;
1754-
if (auto E = Reader.readInteger(AddressSize))
1755-
return std::move(E);
1757+
uint8_t AddressSize = Extractor.getU8(Cursor);
1758+
if (!Cursor)
1759+
return Cursor.takeError();
17561760
if (AddressSize != 8)
17571761
return createStringError(
17581762
inconvertibleErrorCode(),
17591763
"Address size is not 8 bytes, unsupported architecture for MCCAS!");
17601764
}
17611765

17621766
// TODO: Handle Dwarf 64 format, which uses 8 bytes.
1763-
size_t AbbrevPosition = Reader.getOffset();
1764-
uint32_t AbbrevOffset;
1765-
if (auto E = Reader.readInteger(AbbrevOffset))
1766-
return std::move(E);
1767+
size_t AbbrevPosition = Cursor.tell();
1768+
uint32_t AbbrevOffset = Extractor.getU32(Cursor);
1769+
if (!Cursor)
1770+
return Cursor.takeError();
17671771

17681772
if (NewOffset.has_value()) {
17691773
// FIXME: safe but ugly cast. Similar to: llvm::arrayRefFromStringRef.
@@ -1775,11 +1779,8 @@ getAndSetDebugAbbrevOffsetAndSkip(MutableArrayRef<char> CUData,
17751779
return std::move(E);
17761780
}
17771781

1778-
Reader.setOffset(AfterSizeOffset);
1779-
if (auto E = Reader.skip(*Size))
1780-
return std::move(E);
1781-
1782-
return CUInfo{Reader.getOffset(), AbbrevOffset, DwarfVersion};
1782+
Cursor.seek(AfterSizeOffset + *Size);
1783+
return CUInfo{Cursor.tell(), AbbrevOffset, DwarfVersion};
17831784
}
17841785

17851786
/// Given a list of MCFragments, return a vector with the concatenation of their
@@ -3102,14 +3103,15 @@ struct DIEVisitor {
31023103

31033104
Error visitDIERef(DIEDedupeTopLevelRef Ref);
31043105
Error visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack);
3105-
Error visitDIEAttrs(BinaryStreamReader &DataReader, StringRef DIEData,
3106-
ArrayRef<AbbrevContent> DIEContents);
3106+
Error visitDIEAttrs(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3107+
StringRef DIEData, ArrayRef<AbbrevContent> DIEContents);
31073108
Error materializeAbbrevDIE(unsigned AbbrevIdx);
31083109

31093110
uint16_t DwarfVersion;
31103111
SmallVector<AbbrevEntry> AbbrevEntryCache;
31113112
ArrayRef<StringRef> AbbrevEntries;
3112-
BinaryStreamReader DistinctReader;
3113+
DataExtractor DistinctExtractor;
3114+
DataExtractor::Cursor DistinctCursor;
31133115
StringRef DistinctData;
31143116

31153117
std::function<void(StringRef)> HeaderCallback;
@@ -3120,7 +3122,8 @@ struct DIEVisitor {
31203122
std::function<void(StringRef)> NewBlockCallback;
31213123
};
31223124

3123-
Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
3125+
Error DIEVisitor::visitDIEAttrs(DataExtractor &Extractor,
3126+
DataExtractor::Cursor &Cursor,
31243127
StringRef DIEData,
31253128
ArrayRef<AbbrevContent> DIEContents) {
31263129
constexpr auto IsLittleEndian = true;
@@ -3130,29 +3133,32 @@ Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
31303133

31313134
for (auto Contents : DIEContents) {
31323135
bool DataInDistinct = Contents.FormInDistinctData;
3133-
auto &ReaderForData = DataInDistinct ? DistinctReader : DataReader;
3136+
auto &ExtractorForData = DataInDistinct ? DistinctExtractor : Extractor;
3137+
auto &CursorForData = DataInDistinct ? DistinctCursor : Cursor;
31343138
StringRef DataToUse = DataInDistinct ? DistinctData : DIEData;
31353139
Expected<uint64_t> FormSize =
31363140
Contents.FormSize
31373141
? *Contents.FormSize
31383142
: getFormSize(Contents.Form, FormParams, DataToUse,
3139-
ReaderForData.getOffset(), IsLittleEndian, AddrSize);
3143+
CursorForData.tell(), IsLittleEndian, AddrSize);
31403144
if (!FormSize)
31413145
return FormSize.takeError();
31423146

3143-
ArrayRef<char> RawBytes;
3144-
if (auto E = ReaderForData.readArray(RawBytes, *FormSize))
3145-
return E;
3146-
AttrCallback(Contents.Attr, Contents.Form, toStringRef(RawBytes),
3147-
DataInDistinct);
3147+
StringRef RawBytes;
3148+
if (*FormSize)
3149+
RawBytes = ExtractorForData.getBytes(CursorForData, *FormSize);
3150+
if (!CursorForData)
3151+
return CursorForData.takeError();
3152+
AttrCallback(Contents.Attr, Contents.Form, RawBytes, DataInDistinct);
31483153
}
31493154
return Error::success();
31503155
}
31513156

3152-
static Expected<uint64_t> readAbbrevIdx(BinaryStreamReader &Reader) {
3153-
uint64_t Idx;
3154-
if (auto E = Reader.readULEB128(Idx))
3155-
return std::move(E);
3157+
static Expected<uint64_t> readAbbrevIdx(DataExtractor &Extractor,
3158+
DataExtractor::Cursor &Cursor) {
3159+
uint64_t Idx = Extractor.getULEB128(Cursor);
3160+
if (!Cursor)
3161+
return Cursor.takeError();
31563162
return Idx;
31573163
}
31583164

@@ -3273,12 +3279,13 @@ Error DIEVisitor::materializeAbbrevDIE(unsigned AbbrevIdx) {
32733279
/// implementation of a Depth First Search, and this function is used to
32743280
/// simulate a return from a recursive callback, by restoring the locals to a
32753281
/// previous stack frame.
3276-
static void popStack(BinaryStreamReader &Reader, StringRef &Data,
3282+
static void popStack(DataExtractor &Extractor, DataExtractor::Cursor &Cursor,
3283+
StringRef &Data,
32773284
std::stack<std::pair<StringRef, unsigned>> &StackOfNodes) {
32783285
auto DataAndOffset = StackOfNodes.top();
3279-
Reader = BinaryStreamReader(DataAndOffset.first, support::endianness::little);
3286+
Extractor = DataExtractor(DataAndOffset.first, true, 8);
32803287
Data = DataAndOffset.first;
3281-
Reader.setOffset(DataAndOffset.second);
3288+
Cursor.seek(DataAndOffset.second);
32823289
StackOfNodes.pop();
32833290
}
32843291

@@ -3292,11 +3299,13 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
32923299
std::stack<std::pair<StringRef, unsigned>> StackOfNodes;
32933300
auto Data = DIEChildrenStack.empty() ? StringRef()
32943301
: DIEChildrenStack.front().getData();
3295-
BinaryStreamReader Reader(Data, support::endianness::little);
3302+
DataExtractor Extractor(Data, true, 8);
3303+
DataExtractor::Cursor Cursor(0);
32963304

3297-
while (!DistinctReader.empty()) {
3305+
while (!DistinctExtractor.eof(DistinctCursor)) {
32983306

3299-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3307+
Expected<uint64_t> MaybeAbbrevIdx =
3308+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33003309
if (!MaybeAbbrevIdx)
33013310
return MaybeAbbrevIdx.takeError();
33023311
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3306,20 +3315,21 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33063315
// continue materialization of the parent's siblings that may exist.
33073316
if (AbbrevIdx == getEndOfDIESiblingsMarker()) {
33083317
EndTagCallback(true /*HadChildren*/);
3309-
if (!StackOfNodes.empty() && Reader.empty())
3310-
popStack(Reader, Data, StackOfNodes);
3318+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3319+
popStack(Extractor, Cursor, Data, StackOfNodes);
33113320
continue;
33123321
}
33133322

33143323
// If we see a DIEInAnotherBlockMarker, we know that the next DIE is in
33153324
// another CAS Block, we have to push the current CAS Object on the stack,
33163325
// and materialize the next DIE from the DIEChildrenStack.
33173326
if (AbbrevIdx == getDIEInAnotherBlockMarker()) {
3318-
StackOfNodes.push(std::make_pair(Data, Reader.getOffset()));
3327+
StackOfNodes.push(std::make_pair(Data, Cursor.tell()));
33193328
DIEChildrenStack = DIEChildrenStack.drop_front();
33203329
Data = DIEChildrenStack.front().getData();
33213330
NewBlockCallback(DIEChildrenStack.front().getID().toString());
3322-
Reader = BinaryStreamReader(Data, support::endianness::little);
3331+
Extractor = DataExtractor(Data, true, 8);
3332+
Cursor.seek(0);
33233333
continue;
33243334
}
33253335

@@ -3328,16 +3338,16 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33283338
AbbrevEntryCache[decodeAbbrevIndexAsAbbrevSetIdx(AbbrevIdx)];
33293339
StartTagCallback(AbbrevEntryCacheVal.Tag, AbbrevIdx);
33303340

3331-
if (auto E =
3332-
visitDIEAttrs(Reader, Data, AbbrevEntryCacheVal.AbbrevContents))
3341+
if (auto E = visitDIEAttrs(Extractor, Cursor, Data,
3342+
AbbrevEntryCacheVal.AbbrevContents))
33333343
return E;
33343344

33353345
// If the current DIE doesn't have any children, the current CAS Object will
33363346
// not contain any more data, pop the stack to continue materializing its
33373347
// parent's siblings that may exist.
33383348
if (!AbbrevEntryCacheVal.HasChildren) {
3339-
if (!StackOfNodes.empty() && Reader.empty())
3340-
popStack(Reader, Data, StackOfNodes);
3349+
if (!StackOfNodes.empty() && Extractor.eof(Cursor))
3350+
popStack(Extractor, Cursor, Data, StackOfNodes);
33413351
EndTagCallback(false /*HadChildren*/);
33423352
}
33433353
}
@@ -3346,8 +3356,9 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
33463356

33473357
Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33483358

3349-
auto Offset = DistinctReader.getOffset();
3350-
Expected<uint64_t> MaybeAbbrevIdx = readAbbrevIdx(DistinctReader);
3359+
auto Offset = DistinctCursor.tell();
3360+
Expected<uint64_t> MaybeAbbrevIdx =
3361+
readAbbrevIdx(DistinctExtractor, DistinctCursor);
33513362
if (!MaybeAbbrevIdx)
33523363
return MaybeAbbrevIdx.takeError();
33533364
auto AbbrevIdx = *MaybeAbbrevIdx;
@@ -3356,7 +3367,7 @@ Error DIEVisitor::visitDIERef(DIEDedupeTopLevelRef StartDIERef) {
33563367
assert(AbbrevIdx != getEndOfDIESiblingsMarker() &&
33573368
AbbrevIdx != getDIEInAnotherBlockMarker());
33583369

3359-
DistinctReader.setOffset(Offset);
3370+
DistinctCursor.seek(Offset);
33603371

33613372
NewBlockCallback(StartDIERef.getID().toString());
33623373

@@ -3394,34 +3405,36 @@ Error mccasformats::v1::visitDebugInfo(
33943405
compression::zlib::decompress(BuffRef, OutBuff, UncompressedSize))
33953406
return E;
33963407
DistinctData = toStringRef(OutBuff);
3397-
BinaryStreamReader DistinctReader(DistinctData, support::endianness::little);
3398-
#else
3399-
BinaryStreamReader DistinctReader(DistinctData, support::endianness::little);
34003408
#endif
3401-
ArrayRef<char> HeaderData;
3409+
DataExtractor DistinctExtractor(DistinctData, true, 8);
3410+
DataExtractor::Cursor DistinctCursor(0);
34023411

3403-
auto BeginOffset = DistinctReader.getOffset();
3404-
auto Size = getSizeFromDwarfHeader(DistinctReader);
3412+
auto Size = getSizeFromDwarfHeader(DistinctExtractor, DistinctCursor);
34053413
if (!Size)
34063414
return Size.takeError();
34073415

34083416
// 2-byte Dwarf version identifier.
3409-
uint16_t DwarfVersion;
3410-
if (auto E = DistinctReader.readInteger(DwarfVersion))
3411-
return E;
3412-
3413-
DistinctReader.setOffset(BeginOffset);
3417+
uint16_t DwarfVersion = DistinctExtractor.getU16(DistinctCursor);
3418+
DistinctCursor.seek(0);
34143419

3415-
if (auto E = DistinctReader.readArray(
3416-
HeaderData,
3417-
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit))
3418-
return E;
3419-
HeaderCallback(toStringRef(HeaderData));
3420+
StringRef HeaderData = DistinctExtractor.getBytes(
3421+
DistinctCursor,
3422+
DwarfVersion >= 5 ? Dwarf5HeaderSize32Bit : Dwarf4HeaderSize32Bit);
3423+
if (!DistinctCursor)
3424+
return DistinctCursor.takeError();
3425+
HeaderCallback(HeaderData);
34203426

34213427
append_range(TotAbbrevEntries, LoadedTopRef->AbbrevEntries);
3422-
DIEVisitor Visitor{DwarfVersion, {}, TotAbbrevEntries,
3423-
DistinctReader, DistinctData, HeaderCallback,
3424-
StartTagCallback, AttrCallback, EndTagCallback,
3428+
DIEVisitor Visitor{DwarfVersion,
3429+
{},
3430+
TotAbbrevEntries,
3431+
DistinctExtractor,
3432+
DataExtractor::Cursor(DistinctCursor.tell()),
3433+
DistinctData,
3434+
HeaderCallback,
3435+
StartTagCallback,
3436+
AttrCallback,
3437+
EndTagCallback,
34253438
NewBlockCallback};
34263439
return Visitor.visitDIERef(LoadedTopRef->RootDIE);
34273440
}

0 commit comments

Comments
 (0)