-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Obj2Yaml] Add support for minidump generation with 64b memory ranges. #101272
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-lldb Author: Jacob Lalonde (Jlalond) ChangesThis PR adds support for Worth noting
Patch is 21.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101272.diff 11 Files Affected:
diff --git a/lldb/test/Shell/Minidump/Inputs/linux-x86_64.yaml b/lldb/test/Shell/Minidump/Inputs/linux-x86_64.yaml
index d2ae6543141e8..157903f368a08 100644
--- a/lldb/test/Shell/Minidump/Inputs/linux-x86_64.yaml
+++ b/lldb/test/Shell/Minidump/Inputs/linux-x86_64.yaml
@@ -1,7 +1,7 @@
--- !minidump
-Streams:
+Streams:
- Type: ModuleList
- Modules:
+ Modules:
- Base of Image: 0x0000000000400000
Size of Image: 0x00001000
Module Name: '/tmp/test/linux-x86_64'
@@ -13,7 +13,7 @@ Streams:
Number of Processors: 40
Platform ID: Linux
CSD Version: 'Linux 3.13.0-91-generic #138-Ubuntu SMP Fri Jun 24 17:00:34 UTC 2016 x86_64'
- CPU:
+ CPU:
Vendor ID: GenuineIntel
Version Info: 0x00000000
Feature Info: 0x00000000
diff --git a/llvm/include/llvm/BinaryFormat/Minidump.h b/llvm/include/llvm/BinaryFormat/Minidump.h
index 9669303252615..8054e81322a92 100644
--- a/llvm/include/llvm/BinaryFormat/Minidump.h
+++ b/llvm/include/llvm/BinaryFormat/Minidump.h
@@ -74,6 +74,18 @@ struct MemoryDescriptor_64 {
support::ulittle64_t StartOfMemoryRange;
support::ulittle64_t DataSize;
};
+static_assert(sizeof(MemoryDescriptor_64) == 16);
+
+struct MemoryListHeader {
+ support::ulittle32_t NumberOfMemoryRanges;
+};
+static_assert(sizeof(MemoryListHeader) == 4);
+
+struct Memory64ListHeader {
+ support::ulittle64_t NumberOfMemoryRanges;
+ support::ulittle64_t BaseRVA;
+};
+static_assert(sizeof(Memory64ListHeader) == 16);
struct MemoryInfoListHeader {
support::ulittle32_t SizeOfHeader;
diff --git a/llvm/include/llvm/Object/Minidump.h b/llvm/include/llvm/Object/Minidump.h
index e45d4de0090de..eedebc10b3fe7 100644
--- a/llvm/include/llvm/Object/Minidump.h
+++ b/llvm/include/llvm/Object/Minidump.h
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/Minidump.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/Error.h"
+#include <map>
namespace llvm {
namespace object {
@@ -52,6 +53,9 @@ class MinidumpFile : public Binary {
return getDataSlice(getData(), Desc.RVA, Desc.DataSize);
}
+ Expected<ArrayRef<uint8_t>>
+ getRawData(minidump::MemoryDescriptor_64 Desc) const;
+
/// Returns the minidump string at the given offset. An error is returned if
/// we fail to parse the string, or the string is invalid UTF16.
Expected<std::string> getString(size_t Offset) const;
@@ -103,6 +107,15 @@ class MinidumpFile : public Binary {
minidump::StreamType::MemoryList);
}
+ /// Returns the header to the memory 64 list stream. An error is returned if
+ /// the file does not contain this stream.
+ Expected<minidump::Memory64ListHeader> getMemoryList64Header() const {
+ return getStream<minidump::Memory64ListHeader>(
+ minidump::StreamType::Memory64List);
+ }
+
+ Expected<ArrayRef<minidump::MemoryDescriptor_64>> getMemory64List();
+
class MemoryInfoIterator
: public iterator_facade_base<MemoryInfoIterator,
std::forward_iterator_tag,
@@ -152,15 +165,15 @@ class MinidumpFile : public Binary {
}
/// Return a slice of the given data array, with bounds checking.
- static Expected<ArrayRef<uint8_t>> getDataSlice(ArrayRef<uint8_t> Data,
- size_t Offset, size_t Size);
+ static Expected<ArrayRef<uint8_t>>
+ getDataSlice(ArrayRef<uint8_t> Data, uint64_t Offset, uint64_t Size);
/// Return the slice of the given data array as an array of objects of the
/// given type. The function checks that the input array is large enough to
/// contain the correct number of objects of the given type.
template <typename T>
static Expected<ArrayRef<T>> getDataSliceAs(ArrayRef<uint8_t> Data,
- size_t Offset, size_t Count);
+ uint64_t Offset, uint64_t Count);
MinidumpFile(MemoryBufferRef Source, const minidump::Header &Header,
ArrayRef<minidump::Directory> Streams,
@@ -182,9 +195,13 @@ class MinidumpFile : public Binary {
template <typename T>
Expected<ArrayRef<T>> getListStream(minidump::StreamType Stream) const;
+ void cacheMemory64RVAs(uint64_t BaseRVA,
+ ArrayRef<minidump::MemoryDescriptor_64> Descriptors);
+
const minidump::Header &Header;
ArrayRef<minidump::Directory> Streams;
DenseMap<minidump::StreamType, std::size_t> StreamMap;
+ std::unordered_map<uint64_t, uint64_t> Memory64DescriptorToRvaMap;
};
template <typename T>
@@ -208,6 +225,7 @@ Expected<ArrayRef<T>> MinidumpFile::getDataSliceAs(ArrayRef<uint8_t> Data,
getDataSlice(Data, Offset, sizeof(T) * Count);
if (!Slice)
return Slice.takeError();
+
return ArrayRef<T>(reinterpret_cast<const T *>(Slice->data()), Count);
}
diff --git a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
index b0cee541cef20..7f7924b92029f 100644
--- a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -29,6 +29,7 @@ struct Stream {
Exception,
MemoryInfoList,
MemoryList,
+ Memory64List,
ModuleList,
RawContent,
SystemInfo,
@@ -50,8 +51,7 @@ struct Stream {
/// Create a stream from the given stream directory entry.
static Expected<std::unique_ptr<Stream>>
- create(const minidump::Directory &StreamDesc,
- const object::MinidumpFile &File);
+ create(const minidump::Directory &StreamDesc, object::MinidumpFile &File);
};
namespace detail {
@@ -98,12 +98,30 @@ struct ParsedMemoryDescriptor {
minidump::MemoryDescriptor Entry;
yaml::BinaryRef Content;
};
+
+struct ParsedMemory64Descriptor {
+ static constexpr Stream::StreamKind Kind = Stream::StreamKind::Memory64List;
+ static constexpr minidump::StreamType Type =
+ minidump::StreamType::Memory64List;
+
+ minidump::MemoryDescriptor_64 Entry;
+ yaml::BinaryRef Content;
+};
} // namespace detail
using ModuleListStream = detail::ListStream<detail::ParsedModule>;
using ThreadListStream = detail::ListStream<detail::ParsedThread>;
using MemoryListStream = detail::ListStream<detail::ParsedMemoryDescriptor>;
+struct Memory64ListStream
+ : public detail::ListStream<detail::ParsedMemory64Descriptor> {
+ minidump::Memory64ListHeader Header;
+
+ explicit Memory64ListStream(
+ std::vector<detail::ParsedMemory64Descriptor> Entries = {})
+ : ListStream(Entries){};
+};
+
/// ExceptionStream minidump stream.
struct ExceptionStream : public Stream {
minidump::ExceptionStream MDExceptionStream;
@@ -215,7 +233,7 @@ struct Object {
/// The list of streams in this minidump object.
std::vector<std::unique_ptr<Stream>> Streams;
- static Expected<Object> create(const object::MinidumpFile &File);
+ static Expected<Object> create(object::MinidumpFile &File);
};
} // namespace MinidumpYAML
@@ -244,6 +262,12 @@ template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> {
BinaryRef &Content);
};
+template <>
+struct MappingContextTraits<minidump::MemoryDescriptor_64, BinaryRef> {
+ static void mapping(IO &IO, minidump::MemoryDescriptor_64 &Memory,
+ BinaryRef &Content);
+};
+
} // namespace yaml
} // namespace llvm
@@ -269,11 +293,14 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(
llvm::MinidumpYAML::ModuleListStream::entry_type)
LLVM_YAML_DECLARE_MAPPING_TRAITS(
llvm::MinidumpYAML::ThreadListStream::entry_type)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+ llvm::MinidumpYAML::Memory64ListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::Memory64ListStream::entry_type)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::minidump::MemoryInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)
diff --git a/llvm/lib/Object/Minidump.cpp b/llvm/lib/Object/Minidump.cpp
index 6febff89ac519..06bcbe9be195f 100644
--- a/llvm/lib/Object/Minidump.cpp
+++ b/llvm/lib/Object/Minidump.cpp
@@ -99,8 +99,9 @@ template Expected<ArrayRef<Thread>>
template Expected<ArrayRef<MemoryDescriptor>>
MinidumpFile::getListStream(StreamType) const;
-Expected<ArrayRef<uint8_t>>
-MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
+Expected<ArrayRef<uint8_t>> MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data,
+ uint64_t Offset,
+ uint64_t Size) {
// Check for overflow.
if (Offset + Size < Offset || Offset + Size < Size ||
Offset + Size > Data.size())
@@ -154,3 +155,47 @@ MinidumpFile::create(MemoryBufferRef Source) {
return std::unique_ptr<MinidumpFile>(
new MinidumpFile(Source, Hdr, *ExpectedStreams, std::move(StreamMap)));
}
+
+void MinidumpFile::cacheMemory64RVAs(
+ uint64_t BaseRVA, ArrayRef<minidump::MemoryDescriptor_64> Descriptors) {
+ // Check if we've already cached the RVAs for this list.
+ if (!Memory64DescriptorToRvaMap.empty())
+ return;
+
+ for (const auto &MD : Descriptors) {
+ Memory64DescriptorToRvaMap[MD.StartOfMemoryRange] = BaseRVA;
+ BaseRVA += MD.DataSize;
+ }
+}
+
+Expected<ArrayRef<MemoryDescriptor_64>> MinidumpFile::getMemory64List() {
+ Expected<minidump::Memory64ListHeader> ListHeader = getMemoryList64Header();
+ if (!ListHeader)
+ return ListHeader.takeError();
+
+ std::optional<ArrayRef<uint8_t>> Stream =
+ getRawStream(StreamType::Memory64List);
+ if (!Stream)
+ return createError("No such stream");
+
+ Expected<ArrayRef<minidump::MemoryDescriptor_64>> Descriptors =
+ getDataSliceAs<minidump::MemoryDescriptor_64>(
+ *Stream, sizeof(Memory64ListHeader),
+ ListHeader->NumberOfMemoryRanges);
+
+ if (!Descriptors)
+ return Descriptors.takeError();
+
+ cacheMemory64RVAs(ListHeader->NumberOfMemoryRanges, *Descriptors);
+ return Descriptors;
+}
+
+Expected<ArrayRef<uint8_t>>
+MinidumpFile::getRawData(MemoryDescriptor_64 MD) const {
+ if (Memory64DescriptorToRvaMap.count(MD.StartOfMemoryRange) > 0) {
+ uint64_t RVA = Memory64DescriptorToRvaMap.at(MD.StartOfMemoryRange);
+ return getDataSlice(getData(), RVA, MD.DataSize);
+ }
+
+ return createError("No such Memory Descriptor64.");
+}
diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
index 24b521a9925c7..d2f6bcdc20838 100644
--- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
@@ -136,6 +136,22 @@ static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) {
return DataEnd;
}
+static size_t layout(BlobAllocator &File, MinidumpYAML::Memory64ListStream &S) {
+ size_t BaseRVA = File.tell() + sizeof(minidump::Memory64ListHeader);
+ S.Header.BaseRVA = BaseRVA;
+ S.Header.NumberOfMemoryRanges = S.Entries.size();
+ File.allocateObject(S.Header);
+ for (auto &E : S.Entries)
+ File.allocateObject(E.Entry);
+
+ // Save the new offset for the stream size.
+ size_t DataEnd = File.tell();
+ for (auto &E : S.Entries)
+ File.allocateBytes(E.Content);
+
+ return DataEnd;
+}
+
static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
Range.Entry.Memory = layout(File, Range.Content);
}
@@ -190,6 +206,9 @@ static Directory layout(BlobAllocator &File, Stream &S) {
case Stream::StreamKind::MemoryList:
DataEnd = layout(File, cast<MemoryListStream>(S));
break;
+ case Stream::StreamKind::Memory64List:
+ DataEnd = layout(File, cast<Memory64ListStream>(S));
+ break;
case Stream::StreamKind::ModuleList:
DataEnd = layout(File, cast<ModuleListStream>(S));
break;
diff --git a/llvm/lib/ObjectYAML/MinidumpYAML.cpp b/llvm/lib/ObjectYAML/MinidumpYAML.cpp
index fdbd2d8e6dcbc..b318a9e5cc2e5 100644
--- a/llvm/lib/ObjectYAML/MinidumpYAML.cpp
+++ b/llvm/lib/ObjectYAML/MinidumpYAML.cpp
@@ -75,6 +75,8 @@ Stream::StreamKind Stream::getKind(StreamType Type) {
return StreamKind::MemoryInfoList;
case StreamType::MemoryList:
return StreamKind::MemoryList;
+ case StreamType::Memory64List:
+ return StreamKind::Memory64List;
case StreamType::ModuleList:
return StreamKind::ModuleList;
case StreamType::SystemInfo:
@@ -103,6 +105,8 @@ std::unique_ptr<Stream> Stream::create(StreamType Type) {
return std::make_unique<MemoryInfoListStream>();
case StreamKind::MemoryList:
return std::make_unique<MemoryListStream>();
+ case StreamKind::Memory64List:
+ return std::make_unique<Memory64ListStream>();
case StreamKind::ModuleList:
return std::make_unique<ModuleListStream>();
case StreamKind::RawContent:
@@ -256,6 +260,12 @@ void yaml::MappingTraits<MemoryInfo>::mapping(IO &IO, MemoryInfo &Info) {
mapOptionalHex(IO, "Reserved1", Info.Reserved1, 0);
}
+void yaml::MappingTraits<Memory64ListStream::entry_type>::mapping(
+ IO &IO, Memory64ListStream::entry_type &Mem) {
+ MappingContextTraits<MemoryDescriptor_64, yaml::BinaryRef>::mapping(
+ IO, Mem.Entry, Mem.Content);
+}
+
void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
VSFixedFileInfo &Info) {
mapOptionalHex(IO, "Signature", Info.Signature, 0);
@@ -312,6 +322,10 @@ static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) {
IO.mapRequired("Memory Ranges", Stream.Entries);
}
+static void streamMapping(yaml::IO &IO, Memory64ListStream &Stream) {
+ IO.mapRequired("Memory Ranges", Stream.Entries);
+}
+
static void streamMapping(yaml::IO &IO, ModuleListStream &Stream) {
IO.mapRequired("Modules", Stream.Entries);
}
@@ -356,6 +370,13 @@ void yaml::MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping(
IO.mapRequired("Content", Content);
}
+void yaml::MappingContextTraits<MemoryDescriptor_64, yaml::BinaryRef>::mapping(
+ IO &IO, MemoryDescriptor_64 &Memory, BinaryRef &Content) {
+ mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange);
+ mapRequiredHex(IO, "Data Size", Memory.DataSize);
+ IO.mapRequired("Content", Content);
+}
+
void yaml::MappingTraits<ThreadListStream::entry_type>::mapping(
IO &IO, ThreadListStream::entry_type &T) {
mapRequiredHex(IO, "Thread Id", T.Entry.ThreadId);
@@ -416,6 +437,9 @@ void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
case MinidumpYAML::Stream::StreamKind::MemoryList:
streamMapping(IO, llvm::cast<MemoryListStream>(*S));
break;
+ case MinidumpYAML::Stream::StreamKind::Memory64List:
+ streamMapping(IO, llvm::cast<Memory64ListStream>(*S));
+ break;
case MinidumpYAML::Stream::StreamKind::ModuleList:
streamMapping(IO, llvm::cast<ModuleListStream>(*S));
break;
@@ -442,6 +466,7 @@ std::string yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
case MinidumpYAML::Stream::StreamKind::Exception:
case MinidumpYAML::Stream::StreamKind::MemoryInfoList:
case MinidumpYAML::Stream::StreamKind::MemoryList:
+ case MinidumpYAML::Stream::StreamKind::Memory64List:
case MinidumpYAML::Stream::StreamKind::ModuleList:
case MinidumpYAML::Stream::StreamKind::SystemInfo:
case MinidumpYAML::Stream::StreamKind::TextContent:
@@ -459,8 +484,8 @@ void yaml::MappingTraits<Object>::mapping(IO &IO, Object &O) {
IO.mapRequired("Streams", O.Streams);
}
-Expected<std::unique_ptr<Stream>>
-Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
+Expected<std::unique_ptr<Stream>> Stream::create(const Directory &StreamDesc,
+ object::MinidumpFile &File) {
StreamKind Kind = getKind(StreamDesc.Type);
switch (Kind) {
case StreamKind::Exception: {
@@ -494,6 +519,19 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
}
return std::make_unique<MemoryListStream>(std::move(Ranges));
}
+ case StreamKind::Memory64List: {
+ auto ExpectedList = File.getMemory64List();
+ if (!ExpectedList)
+ return ExpectedList.takeError();
+ std::vector<Memory64ListStream::entry_type> Ranges;
+ for (const MemoryDescriptor_64 &MD : *ExpectedList) {
+ auto ExpectedContent = File.getRawData(MD);
+ if (!ExpectedContent)
+ return ExpectedContent.takeError();
+ Ranges.push_back({MD, *ExpectedContent});
+ }
+ return std::make_unique<Memory64ListStream>(std::move(Ranges));
+ }
case StreamKind::ModuleList: {
auto ExpectedList = File.getModuleList();
if (!ExpectedList)
@@ -550,7 +588,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
llvm_unreachable("Unhandled stream kind!");
}
-Expected<Object> Object::create(const object::MinidumpFile &File) {
+Expected<Object> Object::create(object::MinidumpFile &File) {
std::vector<std::unique_ptr<Stream>> Streams;
Streams.reserve(File.streams().size());
for (const Directory &StreamDesc : File.streams()) {
diff --git a/llvm/test/tools/obj2yaml/Minidump/basic.yaml b/llvm/test/tools/obj2yaml/Minidump/basic.yaml
index dfabc132d8e71..9702f82dc5843 100644
--- a/llvm/test/tools/obj2yaml/Minidump/basic.yaml
+++ b/llvm/test/tools/obj2yaml/Minidump/basic.yaml
@@ -67,9 +67,13 @@ Streams:
Parameter 1: 0x24
Thread Context: '8182838485868788'
- Type: MemoryList
- Memory Ranges:
+ Memory Ranges:
- Start of Memory Range: 0x7C7D7E7F80818283
Content: '8485868788'
+ - Type: Memory64List
+ Memory Ranges:
+ - Start of Memory Range: 0x7FFFFFFF0818283
+ Content: '104101108108111'
- Type: MemoryInfoList
Memory Ranges:
- Base Address: 0x0000000000000000
@@ -156,9 +160,13 @@ Streams:
# CHECK-NEXT: Parameter 1: 0x24
# CHECK-NEXT: Thread Context: '8182838485868788'
# CHECK-NEXT: - Type: MemoryList
-# CHECK-NEXT: Memory Ranges:
+# CHECK-NEXT: Memory Ranges:
# CHECK-NEXT: - Start of Memory Range: 0x7C7D7E7F80818283
# CHECK-NEXT: Content: '8485868788'
+# CHECK-NEXT: - Type: Memory64List
+# CHECK-NEXT: Memory Ranges:
+# CHECK-NEXT: - Start of Memory Range: 0x7FFFFFFF0818283
+# CHECK-NEXT: Content: '104101108108111'
# CHECK-NEXT: - Type: MemoryInfoList
# CHECK-NEXT: Memory Ranges:
# CHECK-NEXT: - Base Address: 0x0
diff --git a/llvm/tools/obj2yaml/minidump2yaml.cpp b/llvm/tools/obj2yaml/minidump2yaml.cpp
index 7b25b1869c6b6..53486d57d0932 100644
--- a/llvm/tools/obj2yaml/minidump2yaml.cpp
+++ b/llvm/tools/obj2yaml/minidump2yaml.cpp
@@ -13,7 +13,7 @@
using namespace llvm;
-Error minidump2yaml(raw_ostream &Out, const object::MinidumpFile &Obj) {
+Error minidump2yaml(raw_ostream &Out, object::MinidumpFile &Obj) {
auto ExpectedObject = MinidumpYAML::Object::create(Obj);
if (!ExpectedObject)
return ExpectedObject.takeError();
diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index 03d7db5317acd..96256bc1307d6 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -28,7 +28,7 @@ llvm::Error elf2yaml(llvm::raw_ostream &Out,
llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj,
unsigned RawSegments);
llvm::Error minidump2yaml(llvm::raw_ostream &Out,
- const llvm::object::MinidumpFile &Obj);
+ llvm::object::MinidumpFile &Obj);
llvm::Error xcoff2yaml(llvm::raw_ostream &Out,
const llvm::object::XCOFFObjectFile &Obj);
std::error_code wasm2yaml(llvm::raw_ostream &Out,
diff --git a/llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp b/llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp
index 0297f62d10699..79fb903bcfd10 100644
--- a/llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp
+++ b/llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp
@@ -336,3 +336,51 @@ TEST(MinidumpYAML, ExceptionStream_ExtraParameter) {
0xab, 0xad, 0xca, 0xfe}),
*ExpectedContext);
...
[truncated]
|
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
void yaml::MappingContextTraits<MemoryDescriptor_64, yaml::BinaryRef>::mapping( | ||
IO &IO, MemoryDescriptor_64 &Memory, BinaryRef &Content) { | ||
mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange); | ||
mapRequiredHex(IO, "Data Size", Memory.DataSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would like feedback from reviewers, should we require DataSize here or should we just take the content size?
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
@@ -550,7 +588,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { | |||
llvm_unreachable("Unhandled stream kind!"); | |||
} | |||
|
|||
Expected<Object> Object::create(const object::MinidumpFile &File) { | |||
Expected<Object> Object::create(object::MinidumpFile &File) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@labath I had to drop the const modifier here in order to cache the MemoryDescriptor_64 offsets. I thought this would be appropriate given repetitive linear search in the Stream::create
for Memory64. I'm not sure if removing the const modifier is the correct decision and would appreciate your input.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this doesn't seem completely ideal. Looking at the code, it looks like the only place where you're accessing these, is when you're iterating over them anyway, so maybe some sort of an iterator would be in order?
It could store the current offset in its state without modifying the underlying container. And perhaps its value_type could be std::pair<MemoryDescriptor_64, ArrayRef<uint8_t>>
? It might also need to be an llvm::fallible_iterator
so it can report errors it encounters mid-flight.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fallible_iterator
is really cool! I'll refactor everything over
✅ With the latest revision this PR passed the C/C++ code formatter. |
const std::optional<ArrayRef<uint8_t>> ExpectedContent = | ||
File.getRawStream(StreamType::Memory64List); | ||
ASSERT_TRUE(ExpectedContent); | ||
const ArrayRef<uint8_t> Content = *ExpectedContent; | ||
const size_t offset = | ||
sizeof(Memory64ListHeader) + (sizeof(MemoryDescriptor_64) * 2); | ||
const uint64_t *Array = | ||
reinterpret_cast<const uint64_t *>(Content.slice(offset, 16).data(), 16); | ||
ASSERT_EQ(0x7000000000002Au, Array[0]); | ||
ASSERT_EQ(0x7000A00000000Au, Array[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this trying to test? You need to verify the data, which starts at the BaseRVA
is "hello" and "world" right? Not sure what this is trying to test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed this, please let me know if this makes sense to you now @clayborg
Does look like this is working correctly:
|
So the data for MemoryRanges[1] will be off since it will think it starts at:
And that points to the last 2 bytes of the file. |
So the BaseRVA does look good, the memory range is being set to 8 bytes instead of 5 for each data. |
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
@@ -550,7 +588,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { | |||
llvm_unreachable("Unhandled stream kind!"); | |||
} | |||
|
|||
Expected<Object> Object::create(const object::MinidumpFile &File) { | |||
Expected<Object> Object::create(object::MinidumpFile &File) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this doesn't seem completely ideal. Looking at the code, it looks like the only place where you're accessing these, is when you're iterating over them anyway, so maybe some sort of an iterator would be in order?
It could store the current offset in its state without modifying the underlying container. And perhaps its value_type could be std::pair<MemoryDescriptor_64, ArrayRef<uint8_t>>
? It might also need to be an llvm::fallible_iterator
so it can report errors it encounters mid-flight.
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
@@ -373,7 +373,6 @@ void yaml::MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping( | |||
void yaml::MappingContextTraits<MemoryDescriptor_64, yaml::BinaryRef>::mapping( | |||
IO &IO, MemoryDescriptor_64 &Memory, BinaryRef &Content) { | |||
mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange); | |||
mapRequiredHex(IO, "Data Size", Memory.DataSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually think we should provide a way to override the DataSize field, as the main purpose of the yaml is to test the parser with possibly invalid input. It doesn't have to be present by default though, which I think you can achieve by using IO.mapOptional("Data Size", Memory.DataSize, Content.binary_size());
-- and putting it after the "Content" line.
(I'm not using hex, because the RawContentStream is also printed in decimal, but I could be convinced to change both).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave this a try, but it appears we'll need to do more than set the backup to Contnet.binary_size()
in the mapping. Content isn't initialized when the mapping is being configured, so binary_size()
always resolves to 0.
We could do something like in the yaml/minidump emitter if DataSize is 0 we override it, but currently my patch sticks to setting datasize to the content bytesize when it's being emitted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Content isn't initialized when the mapping is being configured, so binary_size() always resolves to 0.
Are you sure about that? Did you put this line after the line that initializes the Content
field?
This pattern really should work, as that's the same one used for RawContentStream. And now that I've looked into it more closely, I think it'd be best to do exactly the same thing as RawContentStream does, where a smaller DataSize field is an error, and a larger value just pads the output with zeroes (so if you just want a field of a certain size, and don't care about the contents, you can just put DataSize: 47
and omit the Content entirely:
$ yaml2obj <<EOF | obj2yaml
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 100
Content: DEADBEEFBAADF00D
EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Content: DEADBEEFBAADF00D0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
...
$ yaml2obj <<EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 7
Content: DEADBEEFBAADF00D
EOF
YAML:3:5: error: Stream size must be greater or equal to the content size
- Type: LinuxAuxv
^
yaml2obj: error: failed to parse YAML input: Invalid argument
$ yaml2obj <<EOF | obj2yaml
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 10
EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Content: '00000000000000000000'
...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure about that?
No 😆, I trust your knowledge of this much more than my inability to understand. I will try again and use RawContnetStream as a reference this time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code looks fine to me now. I will let Pavel give the final OK as I don't know much about the YAML layering in LLVM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better, but I still have some comments on the implementation. I see you ended up not using the fallible_iterator thing in the end. I'm sort of ok with that, though I think it'd be better to do it that way, as we wouldn't need the upfront array bounds check and we could return partial data where it made sense,
llvm/include/llvm/Object/Minidump.h
Outdated
|
||
public: | ||
Memory64ListFacade(ArrayRef<uint8_t> Storage, | ||
std::vector<minidump::MemoryDescriptor_64> Descriptors, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this needs to be a vector. The memory for these is held by the MinidumpFile object, right?
llvm/include/llvm/Object/Minidump.h
Outdated
@@ -132,6 +140,71 @@ class MinidumpFile : public Binary { | |||
size_t Stride; | |||
}; | |||
|
|||
class Memory64ListFacade { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you need this facade class (particularly if we can avoid storing the of the vector<MemoryDescriptor_64> thing). llvm APIs of this kind will typically return iterator_range<YourIterator>
and put all of the logic into the iterator class.
llvm/include/llvm/Object/Minidump.h
Outdated
assert(Descriptors.size() > Iterator->Count); | ||
minidump::MemoryDescriptor_64 Descriptor = Descriptors[Iterator->Count]; | ||
ArrayRef<uint8_t> Content = | ||
Storage.slice(Iterator->BaseRVA, Descriptor.DataSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's worth dropping a short comment saying that the bounds of this array were checked when this iterator was constructed.
llvm/include/llvm/Object/Minidump.h
Outdated
|
||
MinidumpFile(MemoryBufferRef Source, const minidump::Header &Header, | ||
ArrayRef<minidump::Directory> Streams, | ||
DenseMap<minidump::StreamType, std::size_t> StreamMap) | ||
: Binary(ID_Minidump, Source), Header(Header), Streams(Streams), | ||
StreamMap(std::move(StreamMap)) {} | ||
StreamMap(std::move(StreamMap)) {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
superfluous semicolon
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
@@ -373,7 +373,6 @@ void yaml::MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping( | |||
void yaml::MappingContextTraits<MemoryDescriptor_64, yaml::BinaryRef>::mapping( | |||
IO &IO, MemoryDescriptor_64 &Memory, BinaryRef &Content) { | |||
mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange); | |||
mapRequiredHex(IO, "Data Size", Memory.DataSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Content isn't initialized when the mapping is being configured, so binary_size() always resolves to 0.
Are you sure about that? Did you put this line after the line that initializes the Content
field?
This pattern really should work, as that's the same one used for RawContentStream. And now that I've looked into it more closely, I think it'd be best to do exactly the same thing as RawContentStream does, where a smaller DataSize field is an error, and a larger value just pads the output with zeroes (so if you just want a field of a certain size, and don't care about the contents, you can just put DataSize: 47
and omit the Content entirely:
$ yaml2obj <<EOF | obj2yaml
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 100
Content: DEADBEEFBAADF00D
EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Content: DEADBEEFBAADF00D0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
...
$ yaml2obj <<EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 7
Content: DEADBEEFBAADF00D
EOF
YAML:3:5: error: Stream size must be greater or equal to the content size
- Type: LinuxAuxv
^
yaml2obj: error: failed to parse YAML input: Invalid argument
$ yaml2obj <<EOF | obj2yaml
--- !minidump
Streams:
- Type: LinuxAuxv
Size: 10
EOF
--- !minidump
Streams:
- Type: LinuxAuxv
Content: '00000000000000000000'
...
@labath for this I need to lean on your expertise. When writing this up it seemed more logical to me to verify the array first when constructing the iterator facade. I do see value in being able to return the partial 'good' data, while simultaneously being unsure if returning some data if any of the descriptors point out of bounds. This seems like a correctness problem to me for the MemoryList, but I don't want to decrease the testability of this class, so I'll make another pass at failable_iterator |
… we can cache the memory descriptors instead of doing N^2 lookups. Run Formatters and cleanup debugging code.
… for hello and world
…and add unhappy testcase
b9b62ca
to
2bf7371
Compare
…rmal iterator behavior, and fixes filecheck
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can see you struggled to get this to work, and I appreciate the effort you put in. I think some of the inline comments could make this flow better.
llvm/include/llvm/Object/Minidump.h
Outdated
if (Index >= Descriptors.size()) | ||
return createError("Can't read past of Memory64List Stream"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably be an assertion. The caller shouldn't be incrementing an end()
iterator, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropped entirely now that we're using drop_front
on ArrayRef.
Content: '776f726c64' | ||
)"); | ||
|
||
ASSERT_THAT_EXPECTED(ExpectedFile, Failed()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this checking? Are you sure this isn't failing due to a yaml syntax error (Data Size: 4 1
). Could we assert a specific error message?
If not, maybe you could make that a FileCheck test instead (I suspect that's why test/tools/yaml2obj/Minidump/raw-stream-small-size.yaml
is not a unit test)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We added a check that we would fail if datasize was less than the content size, so I added a small unit test to verify we can't create a yaml with a inferior datasize. The 4 left in was an oversight on my part.
With that in mind, do you still want me to move this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thing I'd like to be sure is that this code fails for the "right" reason. I.e., due to the small size, and not -- like it did before -- due to a syntax error. I see you have fixed the error now, but this is still a fragile pattern, so some sort of a positive test would be better. If there's some indication of the error in the error string, then we could check that with FailedWithMessage("expected error string")
here. Otherwise, I think it be better to do it with a lit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to drop this for now and extend basic.yaml in the positive case where datasize appends 0's
…yrefs, remove expected method and use range loop
@labath Implemented almost all of your feedback, once again I appreciate the in depth review. The only functional thing I did not change was the assertion in the iterator constructor that the first element is readable. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're pretty close, but I still some comments and question. None of them should be major, I hope.
llvm/include/llvm/Object/Minidump.h
Outdated
} | ||
|
||
Error inc() { | ||
if (Storage.size() == 0 || Descriptors.size() == 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (Storage.size() == 0 || Descriptors.size() == 0) { | |
if (Descriptors.empty()) { |
(storage size is checked on line 179)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Github says this has been applied, but I don't see that in the code. Did you maybe forget to upload of force-overwrite something by mistake?
llvm/include/llvm/Object/Minidump.h
Outdated
if (Descriptors.empty()) | ||
IsEnd = true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look correct. Since you're holding the "current" object in a separate member, the emptyness of this does not indicate that you've reached the end (well, you sort of did, but the end iterator should point past the end of the array). I would expect that this should be caught the next time someone calls this function (attempts to increment the iterator).
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
auto Memory64List = File.getMemory64List(Err); | ||
if (Err) | ||
return Err; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto Memory64List = File.getMemory64List(Err); | |
if (Err) | |
return Err; | |
auto Memory64List = File.getMemory64List(Err); |
I think we can delete this check, as the error will be checked below, and we will get an empty iterator range in case of early fatal errors.
llvm/lib/Object/Minidump.cpp
Outdated
return make_range( | ||
llvm::object::MinidumpFile::FallibleMemory64Iterator::itr( | ||
llvm::object::MinidumpFile::Memory64Iterator::end(), Err), | ||
llvm::object::MinidumpFile::FallibleMemory64Iterator::end( | ||
llvm::object::MinidumpFile::Memory64Iterator::end())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return make_range( | |
llvm::object::MinidumpFile::FallibleMemory64Iterator::itr( | |
llvm::object::MinidumpFile::Memory64Iterator::end(), Err), | |
llvm::object::MinidumpFile::FallibleMemory64Iterator::end( | |
llvm::object::MinidumpFile::Memory64Iterator::end())); | |
return make_fallible_range( | |
Memory64Iterator::end(), | |
Memory64Iterator::end(), Err); |
.. although the way I'd probably do it is to delete this function, put an auto end = make_fallible_end(Memory64Iterator::end())
at the beginning of the getMemory64List
function, and then do a return make_range(end, end)
on the error paths.
llvm/lib/ObjectYAML/MinidumpYAML.cpp
Outdated
// If we don't have an error, or if any of the reads succeed, return ranges | ||
// this would also work if we have no descriptors. | ||
if (!Err || Ranges.size() > 0) | ||
return std::make_unique<Memory64ListStream>(std::move(Ranges)); | ||
|
||
return Err; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we encounter an error after reading at least one element, this will assert due to not consuming the error. If this is the behavior you want, you should thrown in an extra consumeError(std::move(Err))
here.
However, I'm not sure this is the best behavior, as we will swallow the error and the caller of this function has no way of knowing that he has given an incomplete range. I think it'd be better to just pass the error instead of an incomplete range. I.e., this:
// If we don't have an error, or if any of the reads succeed, return ranges | |
// this would also work if we have no descriptors. | |
if (!Err || Ranges.size() > 0) | |
return std::make_unique<Memory64ListStream>(std::move(Ranges)); | |
return Err; | |
if (Err) return Err; | |
return std::make_unique<Memory64ListStream>(std::move(Ranges)); |
I realize this may seem to contradict what I said earlier about the getMemory64List
, but there is a logic to that: getMemory64List
can (thanks to the iterator pattern) return both a partial result and and error, and it is this flexibility which allows the callers (such as this function) to choose what they want to do. It's just that for this particular caller, I don't think that a partial result is the most useful mode of operation. Things would be different e.g. for a function that was looking for a particular piece of memory, in which case it could return early (and successfully) even if the rest of the stream was corrupted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You know it's been a fun PR when we've gone full circle! I've edited the code as you suggest.
// Explicit Err check | ||
ASSERT_FALSE(Err); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't be necessary. Could it have something to do with the fact that you're not using the ErrorAsOutParameter
object inside getMemory64List
? (That object allows you to assign to an error object without first checking it).
Expected<iterator_range<object::MinidumpFile::FallibleMemory64Iterator>> | ||
ExpectedMemoryList = File.getMemory64List(Err); | ||
|
||
ASSERT_THAT_EXPECTED(ExpectedMemoryList, Succeeded()); | ||
|
||
iterator_range<object::MinidumpFile::FallibleMemory64Iterator> MemoryList = | ||
*ExpectedMemoryList; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Expected<iterator_range<object::MinidumpFile::FallibleMemory64Iterator>> | |
ExpectedMemoryList = File.getMemory64List(Err); | |
ASSERT_THAT_EXPECTED(ExpectedMemoryList, Succeeded()); | |
iterator_range<object::MinidumpFile::FallibleMemory64Iterator> MemoryList = | |
*ExpectedMemoryList; | |
iterator_range<object::MinidumpFile::FallibleMemory64Iterator> = File.getMemory64List(Err); | |
ASSERT_THAT_ERROR(Err, Succeeded()); |
… to pad 0's if datasize exceeds content
f47e36a
to
72d6a16
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks fine now, just please check what happened with one of the suggestions I highlighted. Thanks for your patience.
llvm/include/llvm/Object/Minidump.h
Outdated
} | ||
|
||
Error inc() { | ||
if (Storage.size() == 0 || Descriptors.size() == 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Github says this has been applied, but I don't see that in the code. Did you maybe forget to upload of force-overwrite something by mistake?
@labath those did get omitted due to github marking them as outdated due to moving the code around, I mixed that. I appreciate your long review on this one, I learned a lot from your feedback. I'm going to let CI run and merge when it's completed |
This PR adds support for
obj2yaml
andyaml2obj
to generate minidumps that have a Memory64List stream. This is a prerequisite to #101086.Worth noting
const dropped on minidumps so we could cache a MemoryDescriptor_64 to it's actual offset, preventing the need to loop multiple timesListStream
code in some places, because the Memory64List has a different width size field (unsigned 64), and a larger header than all the other streams. I determined refactoring the existing code to support Mem64 would be worse than supporting the special case.