Skip to content

Commit d30797b

Browse files
[lldb] Minidump: check for .text hash match with directory
When opening a minidump, we might discover that it reports a UUID for a module that doesn't match the build ID, but rather a hash of the .text section (according to either of two different hash functions, used by breakpad and Facebook respectively). The current logic searches for a module by filename only to check the hash; this change updates it to first search by directory+filename. This is important when the directory specified in the minidump must be interpreted relative to a user-provided sysoort, as the leaf directory won't be in the search path in that case. Also add a regression test; without this change, module validation fails because we have just the placeholder module which reports as its path the platform path in the minidump. Reviewed By: clayborg Differential Revision: https://reviews.llvm.org/D89155
1 parent daae4a8 commit d30797b

File tree

4 files changed

+133
-49
lines changed

4 files changed

+133
-49
lines changed

lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp

Lines changed: 62 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,53 @@ bool ProcessMinidump::UpdateThreadList(ThreadList &old_thread_list,
484484
return new_thread_list.GetSize(false) > 0;
485485
}
486486

487+
ModuleSP ProcessMinidump::GetOrCreateModule(UUID minidump_uuid,
488+
llvm::StringRef name,
489+
ModuleSpec module_spec) {
490+
Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
491+
Status error;
492+
493+
ModuleSP module_sp =
494+
GetTarget().GetOrCreateModule(module_spec, true /* notify */, &error);
495+
if (!module_sp)
496+
return module_sp;
497+
// We consider the module to be a match if the minidump UUID is a
498+
// prefix of the actual UUID, or if either of the UUIDs are empty.
499+
const auto dmp_bytes = minidump_uuid.GetBytes();
500+
const auto mod_bytes = module_sp->GetUUID().GetBytes();
501+
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
502+
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
503+
if (match) {
504+
LLDB_LOG(log, "Partial uuid match for {0}.", name);
505+
return module_sp;
506+
}
507+
508+
// Breakpad generates minindump files, and if there is no GNU build
509+
// ID in the binary, it will calculate a UUID by hashing first 4096
510+
// bytes of the .text section and using that as the UUID for a module
511+
// in the minidump. Facebook uses a modified breakpad client that
512+
// uses a slightly modified this hash to avoid collisions. Check for
513+
// UUIDs from the minindump that match these cases and accept the
514+
// module we find if they do match.
515+
std::vector<uint8_t> breakpad_uuid;
516+
std::vector<uint8_t> facebook_uuid;
517+
HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
518+
if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
519+
LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
520+
return module_sp;
521+
}
522+
if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
523+
LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
524+
return module_sp;
525+
}
526+
// The UUID wasn't a partial match and didn't match the .text hash
527+
// so remove the module from the target, we will need to create a
528+
// placeholder object file.
529+
GetTarget().GetImages().Remove(module_sp);
530+
module_sp.reset();
531+
return module_sp;
532+
}
533+
487534
void ProcessMinidump::ReadModuleList() {
488535
std::vector<const minidump::Module *> filtered_modules =
489536
m_minidump_parser->GetFilteredModuleList();
@@ -513,54 +560,22 @@ void ProcessMinidump::ReadModuleList() {
513560
// add the module to the target if it finds one.
514561
lldb::ModuleSP module_sp = GetTarget().GetOrCreateModule(module_spec,
515562
true /* notify */, &error);
516-
if (!module_sp) {
517-
// Try and find a module without specifying the UUID and only looking for
518-
// the file given a basename. We then will look for a partial UUID match
519-
// if we find any matches. This function will add the module to the
520-
// target if it finds one, so we need to remove the module from the target
521-
// if the UUID doesn't match during our manual UUID verification. This
522-
// allows the "target.exec-search-paths" setting to specify one or more
523-
// directories that contain executables that can be searched for matches.
524-
ModuleSpec basename_module_spec(module_spec);
525-
basename_module_spec.GetUUID().Clear();
526-
basename_module_spec.GetFileSpec().GetDirectory().Clear();
527-
module_sp = GetTarget().GetOrCreateModule(basename_module_spec,
528-
true /* notify */, &error);
529-
if (module_sp) {
530-
// We consider the module to be a match if the minidump UUID is a
531-
// prefix of the actual UUID, or if either of the UUIDs are empty.
532-
const auto dmp_bytes = uuid.GetBytes();
533-
const auto mod_bytes = module_sp->GetUUID().GetBytes();
534-
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
535-
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
536-
if (!match) {
537-
// Breakpad generates minindump files, and if there is no GNU build
538-
// ID in the binary, it will calculate a UUID by hashing first 4096
539-
// bytes of the .text section and using that as the UUID for a module
540-
// in the minidump. Facebook uses a modified breakpad client that
541-
// uses a slightly modified this hash to avoid collisions. Check for
542-
// UUIDs from the minindump that match these cases and accept the
543-
// module we find if they do match.
544-
std::vector<uint8_t> breakpad_uuid;
545-
std::vector<uint8_t> facebook_uuid;
546-
HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
547-
if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
548-
LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
549-
} else if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
550-
LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
551-
} else {
552-
// The UUID wasn't a partial match and didn't match the .text hash
553-
// so remove the module from the target, we will need to create a
554-
// placeholder object file.
555-
GetTarget().GetImages().Remove(module_sp);
556-
module_sp.reset();
557-
}
558-
} else {
559-
LLDB_LOG(log, "Partial uuid match for {0}.", name);
560-
}
561-
}
562-
} else {
563+
if (module_sp) {
563564
LLDB_LOG(log, "Full uuid match for {0}.", name);
565+
} else {
566+
// We couldn't find a module with an exactly-matching UUID. Sometimes
567+
// a minidump UUID is only a partial match or is a hash. So try again
568+
// without specifying the UUID, then again without specifying the
569+
// directory if that fails. This will allow us to find modules with
570+
// partial matches or hash UUIDs in user-provided sysroots or search
571+
// directories (target.exec-search-paths).
572+
ModuleSpec partial_module_spec = module_spec;
573+
partial_module_spec.GetUUID().Clear();
574+
module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
575+
if (!module_sp) {
576+
partial_module_spec.GetFileSpec().GetDirectory().Clear();
577+
module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
578+
}
564579
}
565580
if (module_sp) {
566581
// Watch out for place holder modules that have different paths, but the

lldb/source/Plugins/Process/minidump/ProcessMinidump.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ class ProcessMinidump : public Process {
102102

103103
void ReadModuleList();
104104

105+
lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid,
106+
llvm::StringRef name,
107+
lldb_private::ModuleSpec module_spec);
108+
105109
JITLoaderList &GetJITLoaders() override;
106110

107111
private:

lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ class MiniDumpUUIDTestCase(TestBase):
2222
def verify_module(self, module, verify_path, verify_uuid):
2323
# Compare the filename and the directory separately. We are avoiding
2424
# SBFileSpec.fullpath because it causes a slash/backslash confusion
25-
# on Windows.
25+
# on Windows. Similarly, we compare the directories using normcase
26+
# because they may contain a Linux-style relative path from the
27+
# minidump appended to a Windows-style root path from the host.
2628
self.assertEqual(
2729
os.path.basename(verify_path), module.GetFileSpec().basename)
2830
self.assertEqual(
29-
os.path.dirname(verify_path), module.GetFileSpec().dirname or "")
31+
os.path.normcase(os.path.dirname(verify_path)),
32+
os.path.normcase(module.GetFileSpec().dirname or ""))
3033
self.assertEqual(verify_uuid, module.GetUUIDString())
3134

3235
def get_minidump_modules(self, yaml_file):
@@ -201,6 +204,50 @@ def test_breakpad_hash_match(self):
201204
# will check that this matches.
202205
self.verify_module(modules[0], so_path, "D9C480E8")
203206

207+
def test_breakpad_hash_match_sysroot(self):
208+
"""
209+
Check that we can match the breakpad .text section hash when the
210+
module is located under a user-provided sysroot.
211+
"""
212+
sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
213+
# Create the directory under the sysroot where the minidump reports
214+
# the module.
215+
so_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
216+
so_path = os.path.join(so_dir, "libbreakpad.so")
217+
lldbutil.mkdir_p(so_dir)
218+
self.yaml2obj("libbreakpad.yaml", so_path)
219+
self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
220+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
221+
self.assertEqual(1, len(modules))
222+
# LLDB makes up its own UUID as well when there is no build ID so we
223+
# will check that this matches.
224+
self.verify_module(modules[0], so_path, "D9C480E8")
225+
226+
def test_breakpad_hash_match_sysroot_decoy(self):
227+
"""
228+
Check that we can match the breakpad .text section hash when there is
229+
a module with the right name but wrong contents under a user-provided
230+
sysroot, and the right module is at the given search path..
231+
"""
232+
sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
233+
# Create the directory under the sysroot where the minidump reports
234+
# the module.
235+
decoy_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
236+
decoy_path = os.path.join(decoy_dir, "libbreakpad.so")
237+
lldbutil.mkdir_p(decoy_dir)
238+
self.yaml2obj("libbreakpad-decoy.yaml", decoy_path)
239+
self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
240+
so_dir = os.path.join(self.getBuildDir(), "searchpath_dir")
241+
so_path = os.path.join(so_dir, "libbreakpad.so")
242+
lldbutil.mkdir_p(so_dir)
243+
self.yaml2obj("libbreakpad.yaml", so_path)
244+
self.runCmd('settings set target.exec-search-paths "%s"' % so_dir)
245+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
246+
self.assertEqual(1, len(modules))
247+
# LLDB makes up its own UUID as well when there is no build ID so we
248+
# will check that this matches.
249+
self.verify_module(modules[0], so_path, "D9C480E8")
250+
204251
def test_breakpad_overflow_hash_match(self):
205252
"""
206253
This is a similar to test_breakpad_hash_match, but it verifies that
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This has different .text contents than libbreakpad-yaml,
2+
# to simulate having different versions of the module (to
3+
# test that we pick the one matching the minidump UUID).
4+
--- !ELF
5+
FileHeader:
6+
Class: ELFCLASS32
7+
Data: ELFDATA2LSB
8+
Type: ET_DYN
9+
Machine: EM_ARM
10+
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
11+
Sections:
12+
Sections:
13+
- Name: .text
14+
Type: SHT_PROGBITS
15+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
16+
Address: 0x0000000000010000
17+
AddressAlign: 0x0000000000000004
18+
Content: 040000001400000003000000474E5500CC

0 commit comments

Comments
 (0)