-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[llvm-dwarfdump] Make --verify for .debug_names multithreaded. #127281
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-pgo Author: Alexander Yermolovich (ayermolo) ChangesThis PR makes verification of .debug_names acceleration table multithreaded. In local testing it improves verification of clang .debug_names from four minutes to under a minute. Single Thread Patch is 44.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127281.diff 11 Files Affected:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 9d7ac12cefdc8..ccd8ac147a287 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -770,11 +770,12 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
}
public:
+ using size_type = size_t;
using iterator_category = std::input_iterator_tag;
using value_type = NameTableEntry;
using difference_type = uint32_t;
using pointer = NameTableEntry *;
- using reference = NameTableEntry; // We return entries by value.
+ using reference = NameTableEntry;
/// Creates an iterator whose initial position is name CurrentName in
/// CurrentIndex.
@@ -793,6 +794,14 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
next();
return I;
}
+ reference operator[](size_type idx) {
+ return CurrentIndex->getNameTableEntry(idx + 1);
+ }
+
+ difference_type operator-(const NameIterator &other) const {
+ assert(CurrentIndex == other.CurrentIndex);
+ return this->CurrentName - other.CurrentName;
+ }
friend bool operator==(const NameIterator &A, const NameIterator &B) {
return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 7b51bb63cd15b..27a790ac6bec4 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -16,6 +16,7 @@
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include <cstdint>
#include <map>
+#include <mutex>
#include <set>
namespace llvm {
@@ -32,7 +33,9 @@ struct DWARFSection;
class OutputCategoryAggregator {
private:
+ std::mutex WriteMutex;
std::map<std::string, unsigned> Aggregation;
+ unsigned NumErrors = 0;
bool IncludeDetail;
public:
@@ -42,6 +45,8 @@ class OutputCategoryAggregator {
size_t GetNumCategories() const { return Aggregation.size(); }
void Report(StringRef s, std::function<void()> detailCallback);
void EnumerateResults(std::function<void(StringRef, unsigned)> handleCounts);
+ /// Return the number of errors that have been reported.
+ unsigned GetNumErrors() const { return NumErrors; }
};
/// A class that verifies DWARF debug information given a DWARF Context.
@@ -104,6 +109,7 @@ class DWARFVerifier {
bool IsObjectFile;
bool IsMachOObject;
using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>;
+ std::mutex AccessMutex;
raw_ostream &error() const;
raw_ostream &warn() const;
@@ -264,21 +270,22 @@ class DWARFVerifier {
/// \param SectionName the name of the table we're verifying
///
/// \returns The number of errors occurred during verification
- unsigned verifyAppleAccelTable(const DWARFSection *AccelSection,
- DataExtractor *StrData,
- const char *SectionName);
-
- unsigned verifyDebugNamesCULists(const DWARFDebugNames &AccelTable);
- unsigned verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
- const DataExtractor &StrData);
- unsigned verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI);
- unsigned verifyNameIndexAttribute(const DWARFDebugNames::NameIndex &NI,
- const DWARFDebugNames::Abbrev &Abbr,
- DWARFDebugNames::AttributeEncoding AttrEnc);
- unsigned verifyNameIndexEntries(const DWARFDebugNames::NameIndex &NI,
- const DWARFDebugNames::NameTableEntry &NTE);
- unsigned verifyNameIndexCompleteness(const DWARFDie &Die,
- const DWARFDebugNames::NameIndex &NI);
+ void verifyAppleAccelTable(const DWARFSection *AccelSection,
+ DataExtractor *StrData, const char *SectionName);
+
+ void verifyDebugNamesCULists(const DWARFDebugNames &AccelTable);
+ void verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
+ const DataExtractor &StrData);
+ void verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI);
+ void verifyNameIndexAttribute(const DWARFDebugNames::NameIndex &NI,
+ const DWARFDebugNames::Abbrev &Abbr,
+ DWARFDebugNames::AttributeEncoding AttrEnc);
+ void verifyNameIndexEntries(
+ const DWARFDebugNames::NameIndex &NI,
+ const DWARFDebugNames::NameTableEntry &NTE,
+ const DenseMap<uint64_t, DWARFUnit *> &CUOffsetsToDUMap);
+ void verifyNameIndexCompleteness(const DWARFDie &Die,
+ const DWARFDebugNames::NameIndex &NI);
/// Verify that the DWARF v5 accelerator table is valid.
///
@@ -297,8 +304,8 @@ class DWARFVerifier {
/// \param StrData string section
///
/// \returns The number of errors occurred during verification
- unsigned verifyDebugNames(const DWARFSection &AccelSection,
- const DataExtractor &StrData);
+ void verifyDebugNames(const DWARFSection &AccelSection,
+ const DataExtractor &StrData);
public:
DWARFVerifier(raw_ostream &S, DWARFContext &D,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 107e79cc5a05a..db7d159f492fc 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -33,10 +33,12 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/JSON.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <set>
+#include <unordered_set>
#include <vector>
using namespace llvm;
@@ -1106,10 +1108,9 @@ bool DWARFVerifier::handleDebugLine() {
return NumDebugLineErrors == 0;
}
-unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
- DataExtractor *StrData,
- const char *SectionName) {
- unsigned NumErrors = 0;
+void DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
+ DataExtractor *StrData,
+ const char *SectionName) {
DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), *AccelSection,
DCtx.isLittleEndian(), 0);
AppleAcceleratorTable AccelTable(AccelSectionData, *StrData);
@@ -1121,7 +1122,7 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
ErrorCategory.Report("Section is too small to fit a section header", [&]() {
error() << "Section is too small to fit a section header.\n";
});
- return 1;
+ return;
}
// Verify that the section is not too short.
@@ -1129,7 +1130,7 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
std::string Msg = toString(std::move(E));
ErrorCategory.Report("Section is too small to fit a section header",
[&]() { error() << Msg << '\n'; });
- return 1;
+ return;
}
// Verify that all buckets have a valid hash index or are empty.
@@ -1147,7 +1148,6 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
error() << format("Bucket[%d] has invalid hash index: %u.\n", BucketIdx,
HashIdx);
});
- ++NumErrors;
}
}
uint32_t NumAtoms = AccelTable.getAtomsDesc().size();
@@ -1155,13 +1155,13 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
ErrorCategory.Report("No atoms", [&]() {
error() << "No atoms: failed to read HashData.\n";
});
- return 1;
+ return;
}
if (!AccelTable.validateForms()) {
ErrorCategory.Report("Unsupported form", [&]() {
error() << "Unsupported form: failed to read HashData.\n";
});
- return 1;
+ return;
}
for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
@@ -1176,7 +1176,6 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
"0x%08" PRIx64 ".\n",
HashIdx, HashDataOffset);
});
- ++NumErrors;
}
uint64_t StrpOffset;
@@ -1207,8 +1206,6 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset,
HashDataIdx, Offset, Name);
});
-
- ++NumErrors;
continue;
}
if ((Tag != dwarf::DW_TAG_null) && (Die.getTag() != Tag)) {
@@ -1218,74 +1215,70 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection,
<< dwarf::TagString(Die.getTag()) << " of DIE["
<< HashDataIdx << "].\n";
});
- ++NumErrors;
}
}
- ++StringCount;
}
}
- return NumErrors;
}
-unsigned
-DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
+void DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
// A map from CU offset to the (first) Name Index offset which claims to index
// this CU.
DenseMap<uint64_t, uint64_t> CUMap;
- const uint64_t NotIndexed = std::numeric_limits<uint64_t>::max();
-
CUMap.reserve(DCtx.getNumCompileUnits());
+
+ std::unordered_set<uint64_t> CUOffsets;
for (const auto &CU : DCtx.compile_units())
- CUMap[CU->getOffset()] = NotIndexed;
+ CUOffsets.insert(CU->getOffset());
- unsigned NumErrors = 0;
- for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
+ parallelForEach(AccelTable, [&](const DWARFDebugNames::NameIndex &NI) {
if (NI.getCUCount() == 0) {
ErrorCategory.Report("Name Index doesn't index any CU", [&]() {
error() << formatv("Name Index @ {0:x} does not index any CU\n",
NI.getUnitOffset());
});
- ++NumErrors;
- continue;
+ return;
}
for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) {
uint64_t Offset = NI.getCUOffset(CU);
- auto Iter = CUMap.find(Offset);
-
- if (Iter == CUMap.end()) {
+ if (!CUOffsets.count(Offset)) {
ErrorCategory.Report("Name Index references non-existing CU", [&]() {
error() << formatv(
"Name Index @ {0:x} references a non-existing CU @ {1:x}\n",
NI.getUnitOffset(), Offset);
});
- ++NumErrors;
- continue;
+ return;
}
-
- if (Iter->second != NotIndexed) {
+ uint64_t DuplicateCUOffset = 0;
+ {
+ std::lock_guard<std::mutex> Lock(AccessMutex);
+ auto Iter = CUMap.find(Offset);
+ if (Iter != CUMap.end())
+ DuplicateCUOffset = Iter->second;
+ else
+ CUMap[Offset] = NI.getUnitOffset();
+ }
+ if (DuplicateCUOffset) {
ErrorCategory.Report("Duplicate Name Index", [&]() {
error() << formatv(
"Name Index @ {0:x} references a CU @ {1:x}, but "
"this CU is already indexed by Name Index @ {2:x}\n",
- NI.getUnitOffset(), Offset, Iter->second);
+ NI.getUnitOffset(), Offset, DuplicateCUOffset);
});
- continue;
+ return;
}
- Iter->second = NI.getUnitOffset();
}
- }
+ });
- for (const auto &KV : CUMap) {
- if (KV.second == NotIndexed)
- warn() << formatv("CU @ {0:x} not covered by any Name Index\n", KV.first);
+ for (const auto &CU : DCtx.compile_units()) {
+ if (CUMap.count(CU->getOffset()) == 0)
+ warn() << formatv("CU @ {0:x} not covered by any Name Index\n",
+ CU->getOffset());
}
-
- return NumErrors;
}
-unsigned
-DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
- const DataExtractor &StrData) {
+void DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
+ const DataExtractor &StrData) {
struct BucketInfo {
uint32_t Bucket;
uint32_t Index;
@@ -1295,17 +1288,17 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; }
};
- uint32_t NumErrors = 0;
if (NI.getBucketCount() == 0) {
warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
NI.getUnitOffset());
- return NumErrors;
+ return;
}
// Build up a list of (Bucket, Index) pairs. We use this later to verify that
// each Name is reachable from the appropriate bucket.
std::vector<BucketInfo> BucketStarts;
BucketStarts.reserve(NI.getBucketCount() + 1);
+ const unsigned OrigNumberOfErrors = ErrorCategory.GetNumErrors();
for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
uint32_t Index = NI.getBucketArrayEntry(Bucket);
if (Index > NI.getNameCount()) {
@@ -1315,7 +1308,6 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
Bucket, NI.getUnitOffset(), Index,
NI.getNameCount());
});
- ++NumErrors;
continue;
}
if (Index > 0)
@@ -1325,8 +1317,8 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
// If there were any buckets with invalid values, skip further checks as they
// will likely produce many errors which will only confuse the actual root
// problem.
- if (NumErrors > 0)
- return NumErrors;
+ if (OrigNumberOfErrors != ErrorCategory.GetNumErrors())
+ return;
// Sort the list in the order of increasing "Index" entries.
array_pod_sort(BucketStarts.begin(), BucketStarts.end());
@@ -1352,7 +1344,6 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
"are not covered by the hash table.\n",
NI.getUnitOffset(), NextUncovered, B.Index - 1);
});
- ++NumErrors;
}
uint32_t Idx = B.Index;
@@ -1374,7 +1365,6 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
NI.getUnitOffset(), B.Bucket, FirstHash,
FirstHash % NI.getBucketCount());
});
- ++NumErrors;
}
// This find the end of this bucket and also verifies that all the hashes in
@@ -1395,17 +1385,15 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
"the Name Index hash is {4:x}\n",
NI.getUnitOffset(), Str, Idx, caseFoldingDjbHash(Str), Hash);
});
- ++NumErrors;
}
-
++Idx;
}
NextUncovered = std::max(NextUncovered, Idx);
}
- return NumErrors;
+ return;
}
-unsigned DWARFVerifier::verifyNameIndexAttribute(
+void DWARFVerifier::verifyNameIndexAttribute(
const DWARFDebugNames::NameIndex &NI, const DWARFDebugNames::Abbrev &Abbr,
DWARFDebugNames::AttributeEncoding AttrEnc) {
StringRef FormName = dwarf::FormEncodingString(AttrEnc.Form);
@@ -1416,7 +1404,7 @@ unsigned DWARFVerifier::verifyNameIndexAttribute(
NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
AttrEnc.Form);
});
- return 1;
+ return;
}
if (AttrEnc.Index == DW_IDX_type_hash) {
@@ -1427,9 +1415,9 @@ unsigned DWARFVerifier::verifyNameIndexAttribute(
"uses an unexpected form {2} (should be {3}).\n",
NI.getUnitOffset(), Abbr.Code, AttrEnc.Form, dwarf::DW_FORM_data8);
});
- return 1;
+ return;
}
- return 0;
+ return;
}
if (AttrEnc.Index == dwarf::DW_IDX_parent) {
@@ -1443,9 +1431,9 @@ unsigned DWARFVerifier::verifyNameIndexAttribute(
"DW_FORM_ref4 or DW_FORM_flag_present).\n",
NI.getUnitOffset(), Abbr.Code, AttrEnc.Form);
});
- return 1;
+ return;
}
- return 0;
+ return;
}
// A list of known index attributes and their expected form classes.
@@ -1470,7 +1458,7 @@ unsigned DWARFVerifier::verifyNameIndexAttribute(
warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains an "
"unknown index attribute: {2}.\n",
NI.getUnitOffset(), Abbr.Code, AttrEnc.Index);
- return 0;
+ return;
}
if (!DWARFFormValue(AttrEnc.Form).isFormClass(Iter->Class)) {
@@ -1480,14 +1468,13 @@ unsigned DWARFVerifier::verifyNameIndexAttribute(
NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
AttrEnc.Form, Iter->ClassName);
});
- return 1;
+ return;
}
- return 0;
+ return;
}
-unsigned
-DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
- unsigned NumErrors = 0;
+void DWARFVerifier::verifyNameIndexAbbrevs(
+ const DWARFDebugNames::NameIndex &NI) {
for (const auto &Abbrev : NI.getAbbrevs()) {
StringRef TagName = dwarf::TagString(Abbrev.Tag);
if (TagName.empty()) {
@@ -1505,10 +1492,9 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
"multiple {2} attributes.\n",
NI.getUnitOffset(), Abbrev.Code, AttrEnc.Index);
});
- ++NumErrors;
continue;
}
- NumErrors += verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
+ verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
}
if (NI.getCUCount() > 1 && !Attributes.count(dwarf::DW_IDX_compile_unit) &&
@@ -1519,7 +1505,6 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
"or DW_IDX_type_unit attribute.\n",
NI.getUnitOffset(), Abbrev.Code);
});
- ++NumErrors;
}
if (!Attributes.count(dwarf::DW_IDX_die_offset)) {
ErrorCategory.Report("Abbreviate in NameIndex missing attribute", [&]() {
@@ -1527,10 +1512,8 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
"NameIndex @ {0:x}: Abbreviation {1:x} has no {2} attribute.\n",
NI.getUnitOffset(), Abbrev.Code, dwarf::DW_IDX_die_offset);
});
- ++NumErrors;
}
}
- return NumErrors;
}
static SmallVector<std::string, 3> getNames(const DWARFDie &DIE,
@@ -1571,9 +1554,10 @@ static SmallVector<std::string, 3> getNames(const DWARFDie &DIE,
return Result;
}
-unsigned DWARFVerifier::verifyNameIndexEntries(
+void DWARFVerifier::verifyNameIndexEntries(
const DWARFDebugNames::NameIndex &NI,
- const DWARFDebugNames::NameTableEntry &NTE) {
+ const DWARFDebugNames::NameTableEntry &NTE,
+ const DenseMap<uint64_t, DWARFUnit *> &CUOffsetsToDUMap) {
const char *CStr = NTE.getString();
if (!CStr) {
ErrorCategory.Report("Unable to get string associated with name", [&]() {
@@ -1581,11 +1565,9 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
"with name {1}.\n",
NI.getUnitOffset(), NTE.getIndex());
});
- return 1;
+ return;
}
StringRef Str(CStr);
-
- unsigned NumErrors = 0;
unsigned NumEntries = 0;
uint64_t EntryID = NTE.getEntryOffset();
uint64_t NextEntryID = EntryID;
@@ -1601,7 +1583,6 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
"invalid CU index ({2}).\n",
NI.getUnitOffset(), EntryID, *CUIndex);
});
- ++NumErrors;
continue;
}
const uint32_t NumLocalTUs = NI.getLocalTUCount();
@@ -1612,7 +1593,6 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
"invalid TU index ({2}).\n",
NI.getUnitOffset(), EntryID, *TUIndex);
});
- ++NumErrors;
continue;
}
std::optional<uint64_t> UnitOffset;
@@ -1640,7 +1620,6 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
"foreign TU index ({2}) with no CU index.\n",
NI.getUnitOffset(), EntryID, *TUIndex);
...
[truncated]
|
a8c3d75
to
421d4b6
Compare
Does it make the output nondeterministic, or does it remain sorted? |
std::mutex WriteMutex; | ||
std::map<std::string, unsigned> Aggregation; | ||
unsigned NumErrors = 0; | ||
bool IncludeDetail; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ayermolo can you make sure you have the latest. I actually had just modified the output aggregator to start tracking sub-categories for the JSON. See https://github.com/llvm/llvm-project/pull/125062/files
There also were some tests added for JSON that would be good to run. They might be running on the build jobs assuming it would merge up to latest. Less clear how github works 🤷♂️
static opt<unsigned> VerifyNumThreads( | ||
"verify-num-threads", init(hardware_concurrency().compute_thread_count()), | ||
desc("Number of threads to use for --verify."), cat(DwarfDumpCategory)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do the threads only impact verifying debug names? Should that be made clear for end users? If it does have an effect, even better :D
And this is optional, right? (just checking as my C++ is still a bit rusty). Don't want existing commands to break and I see you setting explicitly to '1' in some tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it is optional. Right now it's only for debug-names, but as other parts of verification become parallel it will apply to them also.
It defaults to max threads hardware supports.
# RUN: llvm-dwarfdump --verify %t1/a.out | FileCheck %s | ||
# RUN: llvm-dwarfdump --verify --verify-num-threads 1 %t1/a.out | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's good that all these tests show that the output with the change and a single thread is the same, but could we add a test where there are two threads?
As Adrian inquired, is the output deterministic? I assume JSON definitely still is, but what about the raw output? @adrian-prantl , is that what you're asking about?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Raw output is not deterministic.
If determinism is required, I think followup PR would be needed to refactor how error reporting is handled.
This would also allow us to limit errors per category.
There are two components to report:
Category that we output to report with counter. In example it's:
Raw output is not deterministic because it is printed as errors encountered. |
To add.
So that category is an enum. |
@@ -903,6 +908,7 @@ int main(int argc, char **argv) { | |||
|
|||
bool Success = true; | |||
if (Verify) { | |||
parallel::strategy = hardware_concurrency(VerifyNumThreads); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is this used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is used inside parallelForEach. It ends up calling llvm::parallelFor in llvm/lib/Support/Parallel.cpp.
If it's set to 1, it uses a regular loop.
@ayermolo I downloaded this patch to try on a large binary and ended up with a seg fault. I'll reach out internally to make sure this is resolved prior to landing. |
7529acf
to
31c5a8b
Compare
Unfortunately adding lock_guard to DWARFVerifier::getNames kills all the performance. :( |
Didn't we have a "peek DIE name" function to address this issue? |
I believe it's in only LLDB. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be good to have to specify using multiple threads when verifying. So maybe opt into multi-threaded output to maintain deterministic output by default and allowing users to opt into non-deterministic output for speed? Something like
--verify-multithreaded
If this is specified, then we can take full advantage of the speed at the cost of non-deterministic output.
I added a flag: |
880510e
to
d820d59
Compare
That will be better to avoid anyone having different or unexpected output. If we change the default it can affect users that rely on this. |
This PR makes verification of .debug_names acceleration table multithreaded. In local testing it improves verification of clang .debug_names from four minutes to under a minute. This PR relies on a current mechanism of extracting DIEs into a vector. Future improvements can include creating API to extract DIE at a time, or grouping Entires into buckets by CUs and extracting before parallel step. Single Thread 4:12.37 real, 246.88 user, 3.54 sys, 0 amem, 10232004 mmem 0:49.40 real, 612.84 user, 515.73 sys, 0 amem, 11226292 mmem
… if die should be in NameIndex. For latter it was hitting pathological case with bolted binaries. Making it 7x slower.
verifyDebugNamesCULists. Latter had a small bug that used return. This meant depending on which NI was processed first, it might not get to all the errors.
77773a2
to
be5fb7b
Compare
This PR makes verification of .debug_names acceleration table multithreaded. In local testing it improves verification of clang .debug_names from four minutes to under a minute.
This PR relies on a current mechanism of extracting DIEs into a vector.
Future improvements can include creating API to extract one DIE at a time, or grouping Entires into buckets by CUs and extracting before parallel step.
Single Thread
4:12.37 real, 246.88 user, 3.54 sys, 0 amem,10232004 mmem
Multi Thread
0:49.40 real, 612.84 user, 515.73 sys, 0 amem, 11226292 mmem