-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[lldb] Add 'FindInMemory()' overload for PostMortemProcess. #102536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Miro Bucko (mbucko) ChangesSummary: Test Plan: Patch is 601.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102536.diff 21 Files Affected:
diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h
index d89314d44bf671..fac35167a7c764 100644
--- a/lldb/include/lldb/Symbol/ObjectFile.h
+++ b/lldb/include/lldb/Symbol/ObjectFile.h
@@ -669,6 +669,13 @@ class ObjectFile : public std::enable_shared_from_this<ObjectFile>,
// transparent decompression of section contents.
size_t CopyData(lldb::offset_t offset, size_t length, void *dst) const;
+ // Returns a pointer to the data at the specified offset. If the offset is
+ // invalid, this function will return nullptr. The 'available_bytes' argument
+ // will be set to the number of bytes available at the given offset, which
+ // will be at most 'length' bytes.
+ const uint8_t *PeekData(lldb::addr_t offset, size_t length,
+ size_t &available_bytes) const;
+
// This function will transparently decompress section data if the section if
// compressed.
virtual size_t ReadSectionData(Section *section,
diff --git a/lldb/include/lldb/Target/PostMortemProcess.h b/lldb/include/lldb/Target/PostMortemProcess.h
index 9c9cd7fa599bc3..a589adda93fe91 100644
--- a/lldb/include/lldb/Target/PostMortemProcess.h
+++ b/lldb/include/lldb/Target/PostMortemProcess.h
@@ -10,6 +10,7 @@
#define LLDB_TARGET_POSTMORTEMPROCESS_H
#include "lldb/Target/Process.h"
+#include "lldb/Utility/RangeMap.h"
namespace lldb_private {
@@ -33,6 +34,23 @@ class PostMortemProcess : public Process {
FileSpec GetCoreFile() const override { return m_core_file; }
protected:
+ typedef lldb_private::Range<lldb::addr_t, lldb::addr_t> FileRange;
+ typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, FileRange>
+ VMRangeToFileOffset;
+ typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, uint32_t>
+ VMRangeToPermissions;
+
+ virtual const uint8_t *PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &size) = 0;
+
+ lldb::addr_t FindInMemory(lldb::addr_t low, lldb::addr_t high,
+ const uint8_t *buf, size_t size) override;
+
+ const uint8_t *DoPeekMemory(lldb::ModuleSP &core_module_sp,
+ VMRangeToFileOffset &core_aranges,
+ lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes);
+
FileSpec m_core_file;
};
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index cf16fbc812aa48..51dba5b36a21c0 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -2703,8 +2703,8 @@ void PruneThreadPlans();
///
/// \return The address where the pattern was found or LLDB_INVALID_ADDRESS if
/// not found.
- lldb::addr_t FindInMemory(lldb::addr_t low, lldb::addr_t high,
- const uint8_t *buf, size_t size);
+ virtual lldb::addr_t FindInMemory(lldb::addr_t low, lldb::addr_t high,
+ const uint8_t *buf, size_t size);
AddressRanges FindRangesInMemory(const uint8_t *buf, uint64_t size,
const AddressRanges &ranges,
@@ -2835,6 +2835,34 @@ void PruneThreadPlans();
AddressRanges &matches, size_t alignment,
size_t max_matches);
+ template <typename IT>
+ lldb::addr_t FindInMemoryGeneric(IT &&iterator, lldb::addr_t low,
+ lldb::addr_t high, const uint8_t *buf,
+ size_t size) {
+ const size_t region_size = high - low;
+
+ if (region_size < size)
+ return LLDB_INVALID_ADDRESS;
+
+ std::vector<size_t> bad_char_heuristic(256, size);
+
+ for (size_t idx = 0; idx < size - 1; idx++) {
+ decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
+ bad_char_heuristic[bcu_idx] = size - idx - 1;
+ }
+ for (size_t s = 0; s <= (region_size - size);) {
+ int64_t j = size - 1;
+ while (j >= 0 && buf[j] == iterator[s + j])
+ j--;
+ if (j < 0)
+ return low + s;
+ else
+ s += bad_char_heuristic[iterator[s + size - 1]];
+ }
+
+ return LLDB_INVALID_ADDRESS;
+ }
+
/// DoGetMemoryRegionInfo is called by GetMemoryRegionInfo after it has
/// removed non address bits from load_addr. Override this method in
/// subclasses of Process.
diff --git a/lldb/include/lldb/Target/ProcessTrace.h b/lldb/include/lldb/Target/ProcessTrace.h
index 7a025100f68032..0fc687fe6b9c62 100644
--- a/lldb/include/lldb/Target/ProcessTrace.h
+++ b/lldb/include/lldb/Target/ProcessTrace.h
@@ -72,6 +72,9 @@ class ProcessTrace : public PostMortemProcess {
bool DoUpdateThreadList(ThreadList &old_thread_list,
ThreadList &new_thread_list) override;
+ const uint8_t *PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) override;
+
private:
static lldb::ProcessSP CreateInstance(lldb::TargetSP target_sp,
lldb::ListenerSP listener_sp,
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 30af9345999c41..2c04cc8905f051 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -1095,3 +1095,9 @@ bool ProcessElfCore::GetProcessInfo(ProcessInstanceInfo &info) {
}
return true;
}
+
+const uint8_t *ProcessElfCore::PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) {
+ return DoPeekMemory(m_core_module_sp, m_core_aranges, low, high,
+ available_bytes);
+}
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
index 668a7c48467475..56afe08f6f3bbe 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
@@ -111,6 +111,9 @@ class ProcessElfCore : public lldb_private::PostMortemProcess {
bool SupportsMemoryTagging() override { return !m_core_tag_ranges.IsEmpty(); }
+ const uint8_t *PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) override;
+
private:
struct NT_FILE_Entry {
lldb::addr_t start;
@@ -123,13 +126,6 @@ class ProcessElfCore : public lldb_private::PostMortemProcess {
lldb_private::UUID uuid;
};
- // For ProcessElfCore only
- typedef lldb_private::Range<lldb::addr_t, lldb::addr_t> FileRange;
- typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, FileRange>
- VMRangeToFileOffset;
- typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, uint32_t>
- VMRangeToPermissions;
-
lldb::ModuleSP m_core_module_sp;
std::string m_dyld_plugin_name;
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index 1da7696c9a352a..696de37f78e496 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -776,3 +776,9 @@ addr_t ProcessMachCore::GetImageInfoAddress() {
lldb_private::ObjectFile *ProcessMachCore::GetCoreObjectFile() {
return m_core_module_sp->GetObjectFile();
}
+
+const uint8_t *ProcessMachCore::PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) {
+ return DoPeekMemory(m_core_module_sp, m_core_aranges, low, high,
+ available_bytes);
+}
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
index 8996ae116614bf..1ec7eaa2bf1558 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
@@ -84,6 +84,9 @@ class ProcessMachCore : public lldb_private::PostMortemProcess {
DoGetMemoryRegionInfo(lldb::addr_t load_addr,
lldb_private::MemoryRegionInfo ®ion_info) override;
+ const uint8_t *PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) override;
+
private:
void CreateMemoryRegions();
@@ -120,13 +123,6 @@ class ProcessMachCore : public lldb_private::PostMortemProcess {
return eKernelCorefile;
}
- // For ProcessMachCore only
- typedef lldb_private::Range<lldb::addr_t, lldb::addr_t> FileRange;
- typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, FileRange>
- VMRangeToFileOffset;
- typedef lldb_private::RangeDataVector<lldb::addr_t, lldb::addr_t, uint32_t>
- VMRangeToPermissions;
-
VMRangeToFileOffset m_core_aranges;
VMRangeToPermissions m_core_range_infos;
lldb::ModuleSP m_core_module_sp;
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index 13599f4a1553fd..ca8f1e46b02731 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -564,6 +564,17 @@ JITLoaderList &ProcessMinidump::GetJITLoaders() {
#define APPEND_OPT(VAR) \
m_option_group.Append(&VAR, LLDB_OPT_SET_ALL, LLDB_OPT_SET_1)
+const uint8_t *ProcessMinidump::PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) {
+ llvm::ArrayRef<uint8_t> mem = m_minidump_parser->GetMemory(low, high - low);
+ if (mem.empty()) {
+ available_bytes = 0;
+ return nullptr;
+ }
+ available_bytes = mem.size();
+ return mem.data();
+}
+
class CommandObjectProcessMinidumpDump : public CommandObjectParsed {
private:
OptionGroupOptions m_option_group;
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
index 3f3123a0a8b5d3..0eabe22d1d860d 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
@@ -106,6 +106,9 @@ class ProcessMinidump : public PostMortemProcess {
JITLoaderList &GetJITLoaders() override;
+ const uint8_t *PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) override;
+
private:
lldb::DataBufferSP m_core_data;
llvm::ArrayRef<minidump::Thread> m_thread_list;
diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index 35317d209de1f9..e55795307a91ec 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -483,6 +483,18 @@ size_t ObjectFile::CopyData(lldb::offset_t offset, size_t length,
return m_data.CopyData(offset, length, dst);
}
+const uint8_t *ObjectFile::PeekData(lldb::addr_t offset, size_t length,
+ size_t &available_bytes) const {
+ const uint8_t *data = m_data.PeekData(offset, length);
+ if (data == nullptr) {
+ available_bytes = 0;
+ return nullptr;
+ }
+
+ available_bytes = length;
+ return data;
+}
+
size_t ObjectFile::ReadSectionData(Section *section,
lldb::offset_t section_offset, void *dst,
size_t dst_len) {
diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt
index a42c44b761dc56..20a200fda57633 100644
--- a/lldb/source/Target/CMakeLists.txt
+++ b/lldb/source/Target/CMakeLists.txt
@@ -26,6 +26,7 @@ add_lldb_library(lldbTarget
OperatingSystem.cpp
PathMappingList.cpp
Platform.cpp
+ PostMortemProcess.cpp
Process.cpp
ProcessTrace.cpp
Queue.cpp
diff --git a/lldb/source/Target/PostMortemProcess.cpp b/lldb/source/Target/PostMortemProcess.cpp
new file mode 100644
index 00000000000000..fb2b8b8c6f9180
--- /dev/null
+++ b/lldb/source/Target/PostMortemProcess.cpp
@@ -0,0 +1,80 @@
+//===-- PostMortemProcess.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/PostMortemProcess.h"
+
+#include "lldb/Core/Module.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/lldb-forward.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+lldb::addr_t PostMortemProcess::FindInMemory(lldb::addr_t low,
+ lldb::addr_t high,
+ const uint8_t *buf, size_t size) {
+ const size_t region_size = high - low;
+ if (region_size < size)
+ return LLDB_INVALID_ADDRESS;
+
+ size_t mem_size = 0;
+ const uint8_t *data = PeekMemory(low, high, mem_size);
+ if (data == nullptr || mem_size != region_size) {
+ LLDB_LOG(GetLog(LLDBLog::Process),
+ "Failed to get contiguous memory region for search. low: 0x{}, "
+ "high: 0x{}. Failling back to Process::FindInMemory",
+ low, high);
+ // In an edge case when the search has to happen across non-contiguous
+ // memory, we will have to fall back on the Process::FindInMemory.
+ return Process::FindInMemory(low, high, buf, size);
+ }
+
+ return Process::FindInMemoryGeneric(data, low, high, buf, size);
+}
+
+const uint8_t *PostMortemProcess::DoPeekMemory(
+ lldb::ModuleSP &core_module_sp, VMRangeToFileOffset &core_aranges,
+ lldb::addr_t low, lldb::addr_t high, size_t &available_bytes) {
+
+ ObjectFile *core_objfile = core_module_sp->GetObjectFile();
+
+ if (core_objfile == nullptr) {
+ available_bytes = 0;
+ return nullptr;
+ }
+
+ const VMRangeToFileOffset::Entry *core_memory_entry =
+ core_aranges.FindEntryThatContains(low);
+ if (core_memory_entry == nullptr || core_memory_entry->GetRangeEnd() < low) {
+ available_bytes = 0;
+ return nullptr;
+ }
+ const lldb::addr_t offset = low - core_memory_entry->GetRangeBase();
+ const lldb::addr_t file_start = core_memory_entry->data.GetRangeBase();
+ const lldb::addr_t file_end = core_memory_entry->data.GetRangeEnd();
+
+ if (file_start == file_end) {
+ available_bytes = 0;
+ return nullptr;
+ }
+ size_t bytes_available = 0;
+ if (file_end > file_start + offset)
+ bytes_available = file_end - (file_start + offset);
+
+ size_t bytes_to_read = high - low;
+ bytes_to_read = std::min(bytes_to_read, bytes_available);
+ if (bytes_to_read == 0) {
+ available_bytes = 0;
+ return nullptr;
+ }
+
+ const uint8_t *ret =
+ core_objfile->PeekData(core_memory_entry->data.GetRangeBase() + offset,
+ bytes_to_read, available_bytes);
+ return ret;
+}
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index e3c4f2ee398cc4..1c7e284bf09dc6 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -2126,9 +2126,8 @@ lldb::addr_t Process::FindInMemory(const uint8_t *buf, uint64_t size,
error.SetErrorString("range load address is invalid");
return LLDB_INVALID_ADDRESS;
}
- const lldb::addr_t end_addr = start_addr + range.GetByteSize();
-
AddressRanges matches;
+ const lldb::addr_t end_addr = start_addr + range.GetByteSize();
DoFindInMemory(start_addr, end_addr, buf, size, matches, alignment, 1);
if (matches.empty())
return LLDB_INVALID_ADDRESS;
@@ -3362,29 +3361,8 @@ Status Process::Halt(bool clear_thread_plans, bool use_run_lock) {
lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
const uint8_t *buf, size_t size) {
- const size_t region_size = high - low;
-
- if (region_size < size)
- return LLDB_INVALID_ADDRESS;
-
- std::vector<size_t> bad_char_heuristic(256, size);
- ProcessMemoryIterator iterator(*this, low);
-
- for (size_t idx = 0; idx < size - 1; idx++) {
- decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
- bad_char_heuristic[bcu_idx] = size - idx - 1;
- }
- for (size_t s = 0; s <= (region_size - size);) {
- int64_t j = size - 1;
- while (j >= 0 && buf[j] == iterator[s + j])
- j--;
- if (j < 0)
- return low + s;
- else
- s += bad_char_heuristic[iterator[s + size - 1]];
- }
-
- return LLDB_INVALID_ADDRESS;
+ return FindInMemoryGeneric(ProcessMemoryIterator(*this, low), low, high, buf,
+ size);
}
Status Process::StopForDestroyOrDetach(lldb::EventSP &exit_event_sp) {
diff --git a/lldb/source/Target/ProcessTrace.cpp b/lldb/source/Target/ProcessTrace.cpp
index 4718a7ca50a7cb..96769079e61b32 100644
--- a/lldb/source/Target/ProcessTrace.cpp
+++ b/lldb/source/Target/ProcessTrace.cpp
@@ -94,6 +94,12 @@ size_t ProcessTrace::ReadMemory(addr_t addr, void *buf, size_t size,
void ProcessTrace::Clear() { m_thread_list.Clear(); }
+const uint8_t *ProcessTrace::PeekMemory(lldb::addr_t low, lldb::addr_t high,
+ size_t &available_bytes) {
+ available_bytes = 0;
+ return nullptr;
+}
+
void ProcessTrace::Initialize() {
static llvm::once_flag g_once_flag;
diff --git a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
index 9ab4619b1f8f4f..5661e9f1a4c9d1 100644
--- a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
+++ b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
@@ -13,49 +13,49 @@ class FindInMemoryTestCase(TestBase):
def setUp(self):
TestBase.setUp(self)
+ live_pi = ProcessInfo()
self.build()
(
- self.target,
- self.process,
- self.thread,
- self.bp,
+ live_pi.target,
+ live_pi.process,
+ live_pi.thread,
+ live_pi.bp,
) = lldbutil.run_to_source_breakpoint(
self,
"break here",
lldb.SBFileSpec("main.cpp"),
)
- self.assertTrue(self.bp.IsValid())
+ live_pi.frame = live_pi.thread.GetFrameAtIndex(0)
+ self.assertTrue(live_pi.bp.IsValid())
+ self.assertTrue(live_pi.process, PROCESS_IS_VALID)
+ self.assertState(live_pi.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
+
+ self.live_pi = live_pi
def test_check_stack_pointer(self):
"""Make sure the 'stack_pointer' variable lives on the stack"""
- self.assertTrue(self.process, PROCESS_IS_VALID)
- self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
-
- frame = self.thread.GetSelectedFrame()
- ex = frame.EvaluateExpression("&stack_pointer")
+ ex = self.live_pi.frame.EvaluateExpression("&stack_pointer")
variable_region = lldb.SBMemoryRegionInfo()
self.assertTrue(
- self.process.GetMemoryRegionInfo(
+ self.live_pi.process.GetMemoryRegionInfo(
ex.GetValueAsUnsigned(), variable_region
).Success(),
)
stack_region = lldb.SBMemoryRegionInfo()
self.assertTrue(
- self.process.GetMemoryRegionInfo(frame.GetSP(), stack_region).Success(),
+ self.live_pi.process.GetMemoryRegionInfo(self.live_pi.frame.GetSP(), stack_region).Success(),
)
self.assertEqual(variable_region, stack_region)
def test_find_in_memory_ok(self):
"""Make sure a match exists in the heap memory and the right address ranges are provided"""
- self.assertTrue(self.process, PROCESS_IS_VALID)
- self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
error = lldb.SBError()
- addr = self.process.FindInMemory(
+ addr = self.live_pi.process.FindInMemory(
SINGLE_INSTANCE_PATTERN_STACK,
- GetStackRange(self),
+ GetStackRange(self, self.live_pi),
1,
error,
)
@@ -65,12 +65,10 @@ def test_find_in_memory_ok(self):
def test_find_in_memory_double_instance_ok(self):
"""Make sure a match exists in the heap memory and the right address ranges are provided"""
- self.assertTrue(self.process, PROCESS_IS_VALID)
- self.assertState(self.process.GetState(), lldb.e...
[truncated]
|
✅ With the latest revision this PR passed the Python code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking about this some more here are my thoughts: both ObjectFile::PeekData
and PostMoretemProcess::PeekMemory
should have a way to say "I am not implemented". Using either a llvm::Expected or std::optional. If ObjectFile::PeekData
returns unimplemented, we need to fall back to ObjectFile::CopyData
. If PostMoretemProcess::PeekMemory
returns unimplemented, we need to fall back to standard PostMoretemProcess::ReadMemory
. The main reason being: some core files are too large to mmap into memory and at some point we need to be able to not load the entire core file at once. If we don't load the entire core file, then we need to allow PostMoretemProcess::PeekMemory
to succeed for areas it does have mapped into the process, and fail for areas where it can't. If we build in the fallback stuff as mentioned, then this all continues to work.
So the main idea is: if either ObjectFile or PostMortemProcess can return a reference to the data in the Peek calls, then should as long as they have this data mapped into the LLDB address space and can hand out a pointer to the data. If they can't, we should fall back to something that can copy the data into a buffer as mentioned with CopyData
or ReadMemory
. Core files or object files might compress up their data, in which case we wouldn't be able to hande out a buffer from calls to Peek, but we would need to unzip the buffer into the buffer supplied by CopyData and ReadMemory on the fly.
This is already the behavior, if ObjectFile::PeekData returns anything other than an array with expected size, we fall back to the default implementation which uses the original ReadMemory() method. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks pretty close to me. Lets give some time for others to review
It just so happens that I got a Now, this patch fixes both of those problems for core files (which implement the required APIs), but it still leaves the operation for live processes in its current, broken, state. This got me thinking whether it would be possible to implement something like this PR in a way that benefits all implementations -- and I think the answer to that question is "yes". The part where you skip over the holes in memory is implementable using the existing GetMemoryRegion API which all processes implement (or should implement). And we can certainly do something to avoid copying in the ReadMemory calls. Probably the best think to do would be to have a I think that would give us most of the performance of the current implementation for core files, and also significantly improve the live process situation. (And we'd also have a single implementation). What to you (all) think? Note that this isn't something I expect you must do on your own. Since this is something that I now need to do on my own anyway, I'd be happy to do some or all of this work myself. |
BTW, we have another bug where the person wants to use lldb to open Anyway, I'm not going to work on this any time soon, just thought you might want to hear about another use case where its not possible to return a reference to existing data. |
This would be quite the rework of the FindInMemory() API. Would it be worth creating an Issue and move the discussion there to unblock this PR? |
I think the two are related because if we (I?) do something like I proposed, then this patch becomes unnecessary (or even gets in the way). I also don't think that would be as much work as you think. In fact, if we look at the number of new APIs added, I'm certain that implementation would end up being simpler. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I vote to leave this PR as is because VMRangeToFileOffset
is a list of VMRanges that map to file offsets and this list combines entries with consecutive address ranges whose data is consecutive in the core file. And VMRangeToPermissions
does not break things up. So not sure how we could use memory regions with this. It would also make the patch really large to have Process::ReadMemory
return a DataExtractor
and change all code over to using that. I do like this idea and think that we should work on a patch that can do this because PeekMemory
could easily set the data pointers within a DataExtractor
without copying the data.
That being said, if there is something we can do to this PR to make it easy for you (Pavel) to implement this feature, we can make those changes. Like I think it would be fair to change all This:
Could become:
And we can adjust all users of |
I'm not sure which feature are you referring to now. I'm not particularly worried about the The thing I am looking into right now is the ability to make |
The purpose of this it to make the |
Summary: The current implementation of 'Process::FindInMemory()' utilizes a slow ReadMemory() API to search for 1 byte at a time in memory. This new overload takes advantage of the fact that the process core is already loaded into memory, allowing for a direct and more efficient search. Test Plan: llvm-lit ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindInMemoryCore.py ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py Reviewers: Subscribers: Tasks: Tags:
Yes, this is primarily for performance of C++ coroutine detection where we must search memory for magic values. |
I see. And do you have an idea of how load-bearing that number is? For example, would you settle for a 18x speedup? That's what I got from #104193, with the advantage that it also works for live processes (with a ~6x speedup) and requires no new APIs. With the addition of a copy-free ReadMemory API it could be made even faster (probably up to your 100x). |
I don't want to derail the conversation about this PR, but I've been wanting to overhaul our Target::ReadMemory and Process::ReadMemory API in lldb for months now, and haven't put together a coherent proposal for it yet. I need to clear a few things off my task list and get started on this for real. My main complaint about the existing API is that many people don't understand the difference between a Target memory read (may read from live memory, may read from the file as an optimization, or if the memory is inaccessible) versus a Process memory read (only read from live memory), and the fact that Process has several convenient access methods like ReadCStringFromMemory which Target doesn't have means people chose based on that. I haven't thought about all the different parameters we need to allow people to specify yet, and how it's going to look in an API, but I think we need an API that doesn't depend on the Target/Process distinction to implicitly get one behavior over another. I hope to think on this enough to get a proposal on a Discourse soon. (and so making changes to how the ReadMemory API return their data would make sense, since everything will need to be updated anyway.) |
Thanks for chiming in Jason. I'm glad this is getting attention, as I think that our memory reading APIs are in need of an overhaul -- and it gives me a chance to share my idea. ;) I don't think that we need to change all of the APIs to return their memory. Sometimes reading the memory into a caller-provided buffer makes perfect sense. What I would like to do is to regularize all the APIs, and avoid the need to implement the logic of e.g. "reading a c string twice". The approach I'd like is to define a Another nice part about this is that if we have some code, which only needs to read memory, then it can take a MemoryReader reference (instead of Process or whatever), and then it can be unit tested by just passing it a mock memory reader. |
Let me know if you guys don't want this patch in. I will closed it and apply it to our local branch. @jasonmolenda @labath |
My comment about hoping to crack open the Target/Process ReadMemory API shouldn't have anything to do with this patch, I don't think. I saw the discussion going in to areas of "the ReadMemory API could behave differently", and I wanted to note that I'm hoping to start a proposal on changing these API, and that would be a good opportunity to make changes like those because I'll probably need to update all the callers across the codebase if we find a new API design we can all agree on. |
@labath just a friendly ping |
Without claiming official authority to make this decision, I'm going to say that I don't think this is a good idea, because the patch is very (and IMO, unnecessarily) specific to post mortem processes. My (currently WIP) patch (#104193) gets you most of the speed benefits, works with live processes as well and it does that without introducing any new APIs. What you do downstream is up to you, but I would encourage you to wait for me to finish up that patch, as this doesn't look like something that would be a good idea to carry downstream (I certainly wouldn't want to do it). |
Closing this PR in favor of #104193 |
Summary:
The current implementation of 'Process::FindInMemory()' utilizes a slow ReadMemory() API to search for 1 byte at a time in memory. This new overload takes advantage of the fact that the process core is already loaded into memory, allowing for a direct and more efficient search.
Test Plan:
llvm-lit ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindInMemoryCore.py ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py ./llvm-project/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py