Skip to content

Commit 7b8b8b6

Browse files
committed
[lldb] Fix and speedup the memory find command
This patch fixes an issue where the `memory find` command would effectively stop searching after encountering a memory read error (which could happen due to unreadable memory), without giving any indication that it has done so (it would just print it could not find the pattern). To make matters worse, it would not terminate after encountering this error, but rather proceed to slowly increment the address pointer, which meant that searching a large region could take a very long time (and give the appearance that lldb is actually searching for the thing). The patch fixes this first problem (*) by detecting read errors and skipping over (using GetMemoryRegionInfo) the unreadable parts of memory and resuming the search after them. It also reads the memory in bulk (up to 1MB), which speeds up the search significantly (up to 6x for live processes, 18x for core files). (*) The fix does not work on windows yet, because the ReadMemory API does not return partial results (like it does for other systems). I'm preparing a separate patch to deal with that.
1 parent cb9267f commit 7b8b8b6

File tree

3 files changed

+136
-45
lines changed

3 files changed

+136
-45
lines changed

lldb/source/Target/Process.cpp

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -114,33 +114,6 @@ class ProcessOptionValueProperties
114114
}
115115
};
116116

117-
class ProcessMemoryIterator {
118-
public:
119-
ProcessMemoryIterator(Process &process, lldb::addr_t base)
120-
: m_process(process), m_base_addr(base) {}
121-
122-
bool IsValid() { return m_is_valid; }
123-
124-
uint8_t operator[](lldb::addr_t offset) {
125-
if (!IsValid())
126-
return 0;
127-
128-
uint8_t retval = 0;
129-
Status error;
130-
if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) {
131-
m_is_valid = false;
132-
return 0;
133-
}
134-
135-
return retval;
136-
}
137-
138-
private:
139-
Process &m_process;
140-
const lldb::addr_t m_base_addr;
141-
bool m_is_valid = true;
142-
};
143-
144117
static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
145118
{
146119
eFollowParent,
@@ -3367,21 +3340,48 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
33673340
if (region_size < size)
33683341
return LLDB_INVALID_ADDRESS;
33693342

3343+
// See "Boyer-Moore string search algorithm".
33703344
std::vector<size_t> bad_char_heuristic(256, size);
3371-
ProcessMemoryIterator iterator(*this, low);
3372-
33733345
for (size_t idx = 0; idx < size - 1; idx++) {
33743346
decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
33753347
bad_char_heuristic[bcu_idx] = size - idx - 1;
33763348
}
3377-
for (size_t s = 0; s <= (region_size - size);) {
3349+
3350+
// Memory we're currently searching through.
3351+
llvm::SmallVector<uint8_t, 0> mem;
3352+
// Position of the memory buffer.
3353+
addr_t mem_pos = low;
3354+
// Maximum number of bytes read (and buffered). We need to read at least
3355+
// `size` bytes for a successful match.
3356+
const size_t max_read_size = std::max<size_t>(size, 0x10000);
3357+
3358+
for (addr_t s = low; s <= (high - size);) {
3359+
if (s + size > mem.size() + mem_pos) {
3360+
// We need to read more data. We don't attempt to reuse the data we've
3361+
// already read (up to `size-1` bytes from `s` to `mem_pos+mem.size()`).
3362+
// This is fine for patterns much smaller than max_read_size. For very
3363+
// long patterns we may need to do something more elaborate.
3364+
mem.resize_for_overwrite(max_read_size);
3365+
Status error;
3366+
mem.resize(
3367+
ReadMemory(s, mem.data(), std::min(mem.size(), high - s), error));
3368+
mem_pos = s;
3369+
if (error.Fail() || size > mem.size()) {
3370+
// We didn't read enough data. Skip to the next memory region.
3371+
MemoryRegionInfo info;
3372+
error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
3373+
if (error.Fail())
3374+
break;
3375+
s = info.GetRange().GetRangeEnd();
3376+
continue;
3377+
}
3378+
}
33783379
int64_t j = size - 1;
3379-
while (j >= 0 && buf[j] == iterator[s + j])
3380+
while (j >= 0 && buf[j] == mem[s + j - mem_pos])
33803381
j--;
33813382
if (j < 0)
3382-
return low + s;
3383-
else
3384-
s += bad_char_heuristic[iterator[s + size - 1]];
3383+
return s; // We have a match.
3384+
s += bad_char_heuristic[mem[s + size - 1 - mem_pos]];
33853385
}
33863386

33873387
return LLDB_INVALID_ADDRESS;

lldb/test/API/functionalities/memory/find/TestMemoryFind.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@
1010

1111

1212
class MemoryFindTestCase(TestBase):
13+
14+
NO_DEBUG_INFO_TESTCASE = True
15+
1316
def setUp(self):
1417
# Call super's setUp().
1518
TestBase.setUp(self)
1619
# Find the line number to break inside main().
1720
self.line = line_number("main.cpp", "// break here")
1821

19-
def test_memory_find(self):
20-
"""Test the 'memory find' command."""
22+
def _prepare_inferior(self):
2123
self.build()
2224
exe = self.getBuildArtifact("a.out")
2325
self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
@@ -39,7 +41,10 @@ def test_memory_find(self):
3941
# The breakpoint should have a hit count of 1.
4042
lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1)
4143

42-
# Test the memory find commands.
44+
def test_memory_find(self):
45+
"""Test the 'memory find' command."""
46+
47+
self._prepare_inferior()
4348

4449
# Empty search string should be handled.
4550
self.expect(
@@ -79,3 +84,22 @@ def test_memory_find(self):
7984
'memory find -s "nothere" `stringdata` `stringdata+10`',
8085
substrs=["data not found within the range."],
8186
)
87+
88+
@expectedFailureWindows
89+
def test_memory_find_with_holes(self):
90+
self._prepare_inferior()
91+
92+
pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
93+
mem_with_holes = (
94+
self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
95+
)
96+
matches_var = self.frame().FindVariable("matches")
97+
self.assertEqual(matches_var.GetNumChildren(), 4)
98+
matches = [
99+
f"data found at location: {matches_var.GetChildAtIndex(i).GetValueAsUnsigned():#x}"
100+
for i in range(4)
101+
]
102+
self.expect(
103+
'memory find -c 5 -s "needle" `mem_with_holes` `mem_with_holes+5*pagesize`',
104+
substrs=matches + ["no more matches within the range"],
105+
)
Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,76 @@
1-
#include <stdio.h>
2-
#include <stdint.h>
3-
4-
int main (int argc, char const *argv[])
5-
{
6-
const char* stringdata = "hello world; I like to write text in const char pointers";
7-
uint8_t bytedata[] = {0xAA,0xBB,0xCC,0xDD,0xEE,0xFF,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99};
8-
return 0; // break here
1+
#include <cstdint>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <initializer_list>
6+
#include <iostream>
7+
8+
#ifdef _WIN32
9+
#include "Windows.h"
10+
11+
int getpagesize() {
12+
SYSTEM_INFO system_info;
13+
GetSystemInfo(&system_info);
14+
return system_info.dwPageSize;
15+
}
16+
17+
char *allocate_memory_with_holes() {
18+
int pagesize = getpagesize();
19+
void *mem = VirtualAlloc(nullptr, 5 * pagesize, MEM_RESERVE, PAGE_NOACCESS);
20+
if (!mem) {
21+
std::cerr << std::system_category().message(GetLastError()) << std::endl;
22+
exit(1);
23+
}
24+
char *bytes = static_cast<char *>(mem);
25+
for (int page : {0, 2, 4}) {
26+
if (!VirtualAlloc(bytes + page * pagesize, pagesize, MEM_COMMIT,
27+
PAGE_READWRITE)) {
28+
std::cerr << std::system_category().message(GetLastError()) << std::endl;
29+
exit(1);
30+
}
31+
}
32+
return bytes;
33+
}
34+
#else
35+
#include "sys/mman.h"
36+
#include "unistd.h"
37+
38+
char *allocate_memory_with_holes() {
39+
int pagesize = getpagesize();
40+
void *mem = mmap(nullptr, 5 * pagesize, PROT_READ | PROT_WRITE,
41+
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
42+
if (mem == MAP_FAILED) {
43+
perror("mmap");
44+
exit(1);
45+
}
46+
char *bytes = static_cast<char *>(mem);
47+
for (int page : {1, 3}) {
48+
if (munmap(bytes + page * pagesize, pagesize) != 0) {
49+
perror("munmap");
50+
exit(1);
51+
}
52+
}
53+
return bytes;
54+
}
55+
#endif
56+
57+
int main(int argc, char const *argv[]) {
58+
const char *stringdata =
59+
"hello world; I like to write text in const char pointers";
60+
uint8_t bytedata[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00, 0x11,
61+
0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99};
62+
63+
char *mem_with_holes = allocate_memory_with_holes();
64+
int pagesize = getpagesize();
65+
char *matches[] = {
66+
mem_with_holes, // Beginning of memory
67+
mem_with_holes + 2 * pagesize, // After a hole
68+
mem_with_holes + 2 * pagesize +
69+
pagesize / 2, // Middle of a block, after an existing match.
70+
mem_with_holes + 5 * pagesize - 7, // End of memory
71+
};
72+
for (char *m : matches)
73+
strcpy(m, "needle");
74+
75+
return 0; // break here
976
}

0 commit comments

Comments
 (0)