Skip to content

Cache Abbreviation entries after materializing them to speedup MCCAS replay #7852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 161 additions & 33 deletions llvm/lib/MCCAS/MCCASObjectV1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3098,11 +3098,48 @@ mccasformats::v1::loadDIETopLevel(DIETopLevelRef TopLevelRef) {
}

struct DIEVisitor {

struct AbbrevContent {
dwarf::Attribute Attr;
dwarf::Form Form;
bool FormInDistinctData;
std::optional<uint8_t> FormSize;
};

struct AbbrevEntry {
dwarf::Tag Tag;
bool HasChildren;
SmallVector<AbbrevContent> AbbrevContents;
};

DIEVisitor() = delete;

DIEVisitor(ArrayRef<StringRef> AbbrevEntries,
BinaryStreamReader DistinctReader, StringRef DistinctData,
std::function<void(StringRef)> HeaderCallback,
std::function<void(dwarf::Tag, uint64_t)> StartTagCallback,
std::function<void(dwarf::Attribute, dwarf::Form, StringRef, bool)>
AttrCallback,
std::function<void(bool)> EndTagCallback,
std::function<void(StringRef)> NewBlockCallback)
: AbbrevEntries(AbbrevEntries), DistinctReader(DistinctReader),
DistinctData(DistinctData), HeaderCallback(HeaderCallback),
StartTagCallback(StartTagCallback), AttrCallback(AttrCallback),
EndTagCallback(EndTagCallback), NewBlockCallback(NewBlockCallback) {
AbbrevEntryCache.reserve(AbbrevEntries.size());
for (unsigned I = 0; I < AbbrevEntries.size(); I++) {
if (Error E = materializeAbbrevDIE(encodeAbbrevIndex(I)))
report_fatal_error(std::move(E));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that we are crashing the compiler on invalid input.
I don't remember how this used to be done in previous iterations of this PR, but we could (maybe in a separate PR) move this out of DIEVisitor entirely; the DIEVisitor could take the already-cached abbreviations

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, once we move to the flat patch, we can do this materialization in visitDIERef

}
}

Error visitDIERef(DIEDedupeTopLevelRef Ref);
Error visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack);
Error visitDIEAttrs(AbbrevEntryReader &AbbrevReader,
BinaryStreamReader &Reader, StringRef DIEData);
Error visitDIEAttrs(BinaryStreamReader &DataReader, StringRef DIEData,
ArrayRef<AbbrevContent> DIEContents);
Error materializeAbbrevDIE(unsigned AbbrevIdx);

SmallVector<AbbrevEntry> AbbrevEntryCache;
ArrayRef<StringRef> AbbrevEntries;
BinaryStreamReader DistinctReader;
StringRef DistinctData;
Expand All @@ -3115,38 +3152,31 @@ struct DIEVisitor {
std::function<void(StringRef)> NewBlockCallback;
};

Error DIEVisitor::visitDIEAttrs(AbbrevEntryReader &AbbrevReader,
BinaryStreamReader &DataReader,
StringRef DIEData) {
Error DIEVisitor::visitDIEAttrs(BinaryStreamReader &DataReader,
StringRef DIEData,
ArrayRef<AbbrevContent> DIEContents) {
constexpr auto IsLittleEndian = true;
constexpr auto AddrSize = 8;
constexpr auto FormParams =
dwarf::FormParams{4 /*Version*/, AddrSize, dwarf::DwarfFormat::DWARF32};

while (true) {
Expected<dwarf::Attribute> Attr = AbbrevReader.readAttr();
if (!Attr)
return Attr.takeError();
if (*Attr == getEndOfAttributesMarker())
break;

Expected<dwarf::Form> Form = AbbrevReader.readForm();
if (!Form)
return Form.takeError();

bool DataInDistinct = doesntDedup(*Form, *Attr);
for (auto Contents : DIEContents) {
bool DataInDistinct = Contents.FormInDistinctData;
auto &ReaderForData = DataInDistinct ? DistinctReader : DataReader;
StringRef DataToUse = DataInDistinct ? DistinctData : DIEData;
Expected<uint64_t> FormSize =
getFormSize(*Form, FormParams, DataToUse, ReaderForData.getOffset(),
IsLittleEndian, AddrSize);
Contents.FormSize
? *Contents.FormSize
: getFormSize(Contents.Form, FormParams, DataToUse,
ReaderForData.getOffset(), IsLittleEndian, AddrSize);
if (!FormSize)
return FormSize.takeError();

ArrayRef<char> RawBytes;
if (auto E = ReaderForData.readArray(RawBytes, *FormSize))
return E;
AttrCallback(*Attr, *Form, toStringRef(RawBytes), DataInDistinct);
AttrCallback(Contents.Attr, Contents.Form, toStringRef(RawBytes),
DataInDistinct);
}
return Error::success();
}
Expand All @@ -3165,6 +3195,111 @@ static AbbrevEntryReader getAbbrevEntryReader(ArrayRef<StringRef> AbbrevEntries,
return AbbrevEntryReader(AbbrevData);
}

static std::optional<uint8_t> getNonULEBFormSize(dwarf::Form Form,
dwarf::FormParams FP) {
switch (Form) {
case dwarf::DW_FORM_addr:
return FP.AddrSize;
case dwarf::DW_FORM_ref_addr:
return FP.getRefAddrByteSize();
case dwarf::DW_FORM_exprloc:
case dwarf::DW_FORM_block:
case dwarf::DW_FORM_block1:
case dwarf::DW_FORM_block2:
case dwarf::DW_FORM_block4:
case dwarf::DW_FORM_sdata:
case dwarf::DW_FORM_udata:
case dwarf::DW_FORM_ref_udata:
case dwarf::DW_FORM_ref4_cas:
case dwarf::DW_FORM_strp_cas:
case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_loclistx:
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_GNU_str_index:
case dwarf::DW_FORM_addrx:
case dwarf::DW_FORM_strx:
case dwarf::DW_FORM_LLVM_addrx_offset:
case dwarf::DW_FORM_string:
case dwarf::DW_FORM_indirect:
return std::nullopt;

case dwarf::DW_FORM_implicit_const:
case dwarf::DW_FORM_flag_present:
return 0;
case dwarf::DW_FORM_data1:
case dwarf::DW_FORM_ref1:
case dwarf::DW_FORM_flag:
case dwarf::DW_FORM_strx1:
case dwarf::DW_FORM_addrx1:
return 1;
case dwarf::DW_FORM_data2:
case dwarf::DW_FORM_ref2:
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_addrx2:
return 2;
case dwarf::DW_FORM_strx3:
return 3;
case dwarf::DW_FORM_data4:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_ref_sup4:
case dwarf::DW_FORM_strx4:
case dwarf::DW_FORM_addrx4:
return 4;
case dwarf::DW_FORM_ref_sig8:
case dwarf::DW_FORM_data8:
case dwarf::DW_FORM_ref8:
case dwarf::DW_FORM_ref_sup8:
return 8;
case dwarf::DW_FORM_data16:
return 16;
case dwarf::DW_FORM_strp:
case dwarf::DW_FORM_sec_offset:
case dwarf::DW_FORM_GNU_ref_alt:
case dwarf::DW_FORM_GNU_strp_alt:
case dwarf::DW_FORM_line_strp:
case dwarf::DW_FORM_strp_sup:
return FP.getDwarfOffsetByteSize();
case dwarf::DW_FORM_addrx3:
case dwarf::DW_FORM_lo_user:
llvm_unreachable("usupported form");
break;
}
}

Error DIEVisitor::materializeAbbrevDIE(unsigned AbbrevIdx) {
constexpr auto AddrSize = 8;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this come from? Should it be hardcoded?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For all of our purposes the AddressSize is always 8 bytes on MachO, if you see DIEVisitor::visitDIEAttrs, the same is true there

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, if you see getAndSetDebugAbbrevOffsetAndSkip, we have a check to make sure that the address size is 8

if (AddressSize != 8)
      return createStringError(
          inconvertibleErrorCode(),
          "Address size is not 8 bytes, unsupported architecture for MCCAS!");

constexpr auto FormParams =
dwarf::FormParams{4 /*Version*/, AddrSize, dwarf::DwarfFormat::DWARF32};

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that also the version for DWARF 5?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized, what happens if you use MCCAS with a dwarf 2 binary? Is there a graceful degradation path?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we never really addressed the dwarf version issue.
We need to do that asap but separately

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Felipe, the DWARF version is a separate issue and I will put up a separate patch for that


AbbrevEntryReader AbbrevReader =
getAbbrevEntryReader(AbbrevEntries, AbbrevIdx);
Expected<dwarf::Tag> MaybeTag = AbbrevReader.readTag();
if (!MaybeTag)
return MaybeTag.takeError();

Expected<bool> MaybeHasChildren = AbbrevReader.readHasChildren();
if (!MaybeHasChildren)
return MaybeHasChildren.takeError();

SmallVector<AbbrevContent> AbbrevVector;
while (true) {
Expected<dwarf::Attribute> Attr = AbbrevReader.readAttr();
if (!Attr)
return Attr.takeError();
if (*Attr == getEndOfAttributesMarker())
break;

Expected<dwarf::Form> Form = AbbrevReader.readForm();
if (!Form)
return Form.takeError();
AbbrevVector.push_back({*Attr, *Form, doesntDedup(*Form, *Attr),
getNonULEBFormSize(*Form, FormParams)});
}
AbbrevEntryCache.push_back(
{*MaybeTag, *MaybeHasChildren, std::move(AbbrevVector)});
return Error::success();
}

/// Restores the state of the \p Reader and \p Data
/// arguments to a previous state. The algorithm in visitDIERefs is an iterative
/// implementation of a Depth First Search, and this function is used to
Expand Down Expand Up @@ -3217,25 +3352,18 @@ Error DIEVisitor::visitDIERef(ArrayRef<DIEDataRef> &DIEChildrenStack) {
}

// If we have a legitimate AbbrevIdx, materialize the current DIE.
AbbrevEntryReader AbbrevReader =
getAbbrevEntryReader(AbbrevEntries, AbbrevIdx);

if (Expected<dwarf::Tag> MaybeTag = AbbrevReader.readTag())
StartTagCallback(*MaybeTag, AbbrevIdx);
else
return MaybeTag.takeError();

Expected<bool> MaybeHasChildren = AbbrevReader.readHasChildren();
if (!MaybeHasChildren)
return MaybeHasChildren.takeError();
auto &AbbrevEntryCacheVal =
AbbrevEntryCache[decodeAbbrevIndexAsAbbrevSetIdx(AbbrevIdx)];
StartTagCallback(AbbrevEntryCacheVal.Tag, AbbrevIdx);

if (auto E = visitDIEAttrs(AbbrevReader, Reader, Data))
if (auto E =
visitDIEAttrs(Reader, Data, AbbrevEntryCacheVal.AbbrevContents))
return E;

// If the current DIE doesn't have any children, the current CAS Object will
// not contain any more data, pop the stack to continue materializing its
// parent's siblings that may exist.
if (!*MaybeHasChildren) {
if (!AbbrevEntryCacheVal.HasChildren) {
if (!StackOfNodes.empty() && Reader.empty())
popStack(Reader, Data, StackOfNodes);
EndTagCallback(false /*HadChildren*/);
Expand Down