Skip to content

Commit 5e139f4

Browse files
author
git apple-llvm automerger
committed
Merge commit 'b906ddc4d41f' from apple/stable/20200714 into swift/main
2 parents 437a4b7 + b906ddc commit 5e139f4

File tree

10 files changed

+367
-30
lines changed

10 files changed

+367
-30
lines changed

lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp

Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,72 @@ class PlaceholderObjectFile : public ObjectFile {
121121
lldb::addr_t m_base;
122122
lldb::addr_t m_size;
123123
};
124+
125+
/// Duplicate the HashElfTextSection() from the breakpad sources.
126+
///
127+
/// Breakpad, a Google crash log reporting tool suite, creates minidump files
128+
/// for many different architectures. When using Breakpad to create ELF
129+
/// minidumps, it will check for a GNU build ID when creating a minidump file
130+
/// and if one doesn't exist in the file, it will say the UUID of the file is a
131+
/// checksum of up to the first 4096 bytes of the .text section. Facebook also
132+
/// uses breakpad and modified this hash to avoid collisions so we can
133+
/// calculate and check for this as well.
134+
///
135+
/// The breakpad code might end up hashing up to 15 bytes that immediately
136+
/// follow the .text section in the file, so this code must do exactly what it
137+
/// does so we can get an exact match for the UUID.
138+
///
139+
/// \param[in] module_sp The module to grab the .text section from.
140+
///
141+
/// \param[in/out] breakpad_uuid A vector that will receive the calculated
142+
/// breakpad .text hash.
143+
///
144+
/// \param[in/out] facebook_uuid A vector that will receive the calculated
145+
/// facebook .text hash.
146+
///
147+
void HashElfTextSection(ModuleSP module_sp, std::vector<uint8_t> &breakpad_uuid,
148+
std::vector<uint8_t> &facebook_uuid) {
149+
SectionList *sect_list = module_sp->GetSectionList();
150+
if (sect_list == nullptr)
151+
return;
152+
SectionSP sect_sp = sect_list->FindSectionByName(ConstString(".text"));
153+
if (!sect_sp)
154+
return;
155+
constexpr size_t kMDGUIDSize = 16;
156+
constexpr size_t kBreakpadPageSize = 4096;
157+
// The breakpad code has a bug where it might access beyond the end of a
158+
// .text section by up to 15 bytes, so we must ensure we round up to the
159+
// next kMDGUIDSize byte boundary.
160+
DataExtractor data;
161+
const size_t text_size = sect_sp->GetFileSize();
162+
const size_t read_size = std::min<size_t>(
163+
llvm::alignTo(text_size, kMDGUIDSize), kBreakpadPageSize);
164+
sect_sp->GetObjectFile()->GetData(sect_sp->GetFileOffset(), read_size, data);
165+
166+
breakpad_uuid.assign(kMDGUIDSize, 0);
167+
facebook_uuid.assign(kMDGUIDSize, 0);
168+
169+
// The only difference between the breakpad hash and the facebook hash is the
170+
// hashing of the text section size into the hash prior to hashing the .text
171+
// contents.
172+
for (size_t i = 0; i < kMDGUIDSize; i++)
173+
facebook_uuid[i] ^= text_size % 255;
174+
175+
// This code carefully duplicates how the hash was created in Breakpad
176+
// sources, including the error where it might has an extra 15 bytes past the
177+
// end of the .text section if the .text section is less than a page size in
178+
// length.
179+
const uint8_t *ptr = data.GetDataStart();
180+
const uint8_t *ptr_end = data.GetDataEnd();
181+
while (ptr < ptr_end) {
182+
for (unsigned i = 0; i < kMDGUIDSize; i++) {
183+
breakpad_uuid[i] ^= ptr[i];
184+
facebook_uuid[i] ^= ptr[i];
185+
}
186+
ptr += kMDGUIDSize;
187+
}
188+
}
189+
124190
} // namespace
125191

126192
ConstString ProcessMinidump::GetPluginNameStatic() {
@@ -444,6 +510,53 @@ bool ProcessMinidump::UpdateThreadList(ThreadList &old_thread_list,
444510
return new_thread_list.GetSize(false) > 0;
445511
}
446512

513+
ModuleSP ProcessMinidump::GetOrCreateModule(UUID minidump_uuid,
514+
llvm::StringRef name,
515+
ModuleSpec module_spec) {
516+
Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
517+
Status error;
518+
519+
ModuleSP module_sp =
520+
GetTarget().GetOrCreateModule(module_spec, true /* notify */, &error);
521+
if (!module_sp)
522+
return module_sp;
523+
// We consider the module to be a match if the minidump UUID is a
524+
// prefix of the actual UUID, or if either of the UUIDs are empty.
525+
const auto dmp_bytes = minidump_uuid.GetBytes();
526+
const auto mod_bytes = module_sp->GetUUID().GetBytes();
527+
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
528+
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
529+
if (match) {
530+
LLDB_LOG(log, "Partial uuid match for {0}.", name);
531+
return module_sp;
532+
}
533+
534+
// Breakpad generates minindump files, and if there is no GNU build
535+
// ID in the binary, it will calculate a UUID by hashing first 4096
536+
// bytes of the .text section and using that as the UUID for a module
537+
// in the minidump. Facebook uses a modified breakpad client that
538+
// uses a slightly modified this hash to avoid collisions. Check for
539+
// UUIDs from the minindump that match these cases and accept the
540+
// module we find if they do match.
541+
std::vector<uint8_t> breakpad_uuid;
542+
std::vector<uint8_t> facebook_uuid;
543+
HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
544+
if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
545+
LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
546+
return module_sp;
547+
}
548+
if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
549+
LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
550+
return module_sp;
551+
}
552+
// The UUID wasn't a partial match and didn't match the .text hash
553+
// so remove the module from the target, we will need to create a
554+
// placeholder object file.
555+
GetTarget().GetImages().Remove(module_sp);
556+
module_sp.reset();
557+
return module_sp;
558+
}
559+
447560
void ProcessMinidump::ReadModuleList() {
448561
std::vector<const minidump::Module *> filtered_modules =
449562
m_minidump_parser->GetFilteredModuleList();
@@ -473,30 +586,21 @@ void ProcessMinidump::ReadModuleList() {
473586
// add the module to the target if it finds one.
474587
lldb::ModuleSP module_sp = GetTarget().GetOrCreateModule(module_spec,
475588
true /* notify */, &error);
476-
if (!module_sp) {
477-
// Try and find a module without specifying the UUID and only looking for
478-
// the file given a basename. We then will look for a partial UUID match
479-
// if we find any matches. This function will add the module to the
480-
// target if it finds one, so we need to remove the module from the target
481-
// if the UUID doesn't match during our manual UUID verification. This
482-
// allows the "target.exec-search-paths" setting to specify one or more
483-
// directories that contain executables that can be searched for matches.
484-
ModuleSpec basename_module_spec(module_spec);
485-
basename_module_spec.GetUUID().Clear();
486-
basename_module_spec.GetFileSpec().GetDirectory().Clear();
487-
module_sp = GetTarget().GetOrCreateModule(basename_module_spec,
488-
true /* notify */, &error);
489-
if (module_sp) {
490-
// We consider the module to be a match if the minidump UUID is a
491-
// prefix of the actual UUID, or if either of the UUIDs are empty.
492-
const auto dmp_bytes = uuid.GetBytes();
493-
const auto mod_bytes = module_sp->GetUUID().GetBytes();
494-
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
495-
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
496-
if (!match) {
497-
GetTarget().GetImages().Remove(module_sp);
498-
module_sp.reset();
499-
}
589+
if (module_sp) {
590+
LLDB_LOG(log, "Full uuid match for {0}.", name);
591+
} else {
592+
// We couldn't find a module with an exactly-matching UUID. Sometimes
593+
// a minidump UUID is only a partial match or is a hash. So try again
594+
// without specifying the UUID, then again without specifying the
595+
// directory if that fails. This will allow us to find modules with
596+
// partial matches or hash UUIDs in user-provided sysroots or search
597+
// directories (target.exec-search-paths).
598+
ModuleSpec partial_module_spec = module_spec;
599+
partial_module_spec.GetUUID().Clear();
600+
module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
601+
if (!module_sp) {
602+
partial_module_spec.GetFileSpec().GetDirectory().Clear();
603+
module_sp = GetOrCreateModule(uuid, name, partial_module_spec);
500604
}
501605
}
502606
if (module_sp) {

lldb/source/Plugins/Process/minidump/ProcessMinidump.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ class ProcessMinidump : public Process {
102102

103103
void ReadModuleList();
104104

105+
lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid,
106+
llvm::StringRef name,
107+
lldb_private::ModuleSpec module_spec);
108+
105109
JITLoaderList &GetJITLoaders() override;
106110

107111
private:

lldb/source/Target/Platform.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,21 +1580,29 @@ Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
15801580
if (error.Success() && module_sp)
15811581
break;
15821582
}
1583-
if (module_sp)
1583+
if (module_sp) {
1584+
resolved_module_spec = arch_module_spec;
15841585
got_module_spec = true;
1586+
}
15851587
}
15861588

15871589
if (!got_module_spec) {
15881590
// Get module information from a target.
1589-
if (!GetModuleSpec(module_spec.GetFileSpec(), module_spec.GetArchitecture(),
1590-
resolved_module_spec)) {
1591+
if (GetModuleSpec(module_spec.GetFileSpec(), module_spec.GetArchitecture(),
1592+
resolved_module_spec)) {
15911593
if (!module_spec.GetUUID().IsValid() ||
15921594
module_spec.GetUUID() == resolved_module_spec.GetUUID()) {
1593-
return module_resolver(module_spec);
1595+
got_module_spec = true;
15941596
}
15951597
}
15961598
}
15971599

1600+
if (!got_module_spec) {
1601+
// Fall back to the given module resolver, which may have its own
1602+
// search logic.
1603+
return module_resolver(module_spec);
1604+
}
1605+
15981606
// If we are looking for a specific UUID, make sure resolved_module_spec has
15991607
// the same one before we search.
16001608
if (module_spec.GetUUID().IsValid()) {

lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpNew.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,30 @@ def check_region(index, start, end, read, write, execute, mapped, name):
455455
check_region(17, 0x40169000, 0x4016b000, True, True, False, True, d)
456456
check_region(18, 0x4016b000, 0x40176000, True, True, False, True, n)
457457
check_region(-1, 0x40176000, max_int, False, False, False, False, n)
458+
459+
@skipIfLLVMTargetMissing("X86")
460+
def test_minidump_sysroot(self):
461+
"""Test that lldb can find a module referenced in an i386 linux minidump using the sysroot."""
462+
463+
# Copy linux-x86_64 executable to tmp_sysroot/temp/test/ (since it was compiled as
464+
# /tmp/test/linux-x86_64)
465+
tmp_sysroot = os.path.join(
466+
self.getBuildDir(), "lldb_i386_mock_sysroot")
467+
executable = os.path.join(
468+
tmp_sysroot, "tmp", "test", "linux-x86_64")
469+
exe_dir = os.path.dirname(executable)
470+
lldbutil.mkdir_p(exe_dir)
471+
shutil.copyfile("linux-x86_64", executable)
472+
473+
# Set sysroot and load core
474+
self.runCmd("platform select remote-linux --sysroot '%s'" %
475+
tmp_sysroot)
476+
self.process_from_yaml("linux-x86_64.yaml")
477+
self.check_state()
478+
479+
# Check that we loaded the module from the sysroot
480+
self.assertEqual(self.target.GetNumModules(), 1)
481+
module = self.target.GetModuleAtIndex(0)
482+
spec_dir_norm = os.path.normcase(module.GetFileSpec().GetDirectory())
483+
exe_dir_norm = os.path.normcase(exe_dir)
484+
self.assertEqual(spec_dir_norm, exe_dir_norm)

lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ class MiniDumpUUIDTestCase(TestBase):
2121
def verify_module(self, module, verify_path, verify_uuid):
2222
# Compare the filename and the directory separately. We are avoiding
2323
# SBFileSpec.fullpath because it causes a slash/backslash confusion
24-
# on Windows.
24+
# on Windows. Similarly, we compare the directories using normcase
25+
# because they may contain a Linux-style relative path from the
26+
# minidump appended to a Windows-style root path from the host.
2527
self.assertEqual(
2628
os.path.basename(verify_path), module.GetFileSpec().basename)
2729
self.assertEqual(
28-
os.path.dirname(verify_path), module.GetFileSpec().dirname or "")
30+
os.path.normcase(os.path.dirname(verify_path)),
31+
os.path.normcase(module.GetFileSpec().dirname or ""))
2932
self.assertEqual(verify_uuid, module.GetUUIDString())
3033

3134
def get_minidump_modules(self, yaml_file):
@@ -179,6 +182,113 @@ def test_partial_uuid_mismatch(self):
179182
"/invalid/path/on/current/system/libuuidmismatch.so",
180183
"7295E17C-6668-9E05-CBB5-DEE5003865D5")
181184

185+
def test_breakpad_hash_match(self):
186+
"""
187+
Breakpad creates minidump files using CvRecord in each module whose
188+
signature is set to PDB70 where the UUID is a hash generated by
189+
breakpad of the .text section. This is only done when the
190+
executable has no ELF build ID.
191+
192+
This test verifies that if we have a minidump with a 16 byte UUID,
193+
that we are able to associate a symbol file with no ELF build ID
194+
and match it up by hashing the .text section.
195+
"""
196+
so_path = self.getBuildArtifact("libbreakpad.so")
197+
self.yaml2obj("libbreakpad.yaml", so_path)
198+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
199+
self.dbg.HandleCommand(cmd)
200+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
201+
self.assertEqual(1, len(modules))
202+
# LLDB makes up it own UUID as well when there is no build ID so we
203+
# will check that this matches.
204+
self.verify_module(modules[0], so_path, "D9C480E8")
205+
206+
def test_breakpad_hash_match_sysroot(self):
207+
"""
208+
Check that we can match the breakpad .text section hash when the
209+
module is located under a user-provided sysroot.
210+
"""
211+
sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
212+
# Create the directory under the sysroot where the minidump reports
213+
# the module.
214+
so_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
215+
so_path = os.path.join(so_dir, "libbreakpad.so")
216+
lldbutil.mkdir_p(so_dir)
217+
self.yaml2obj("libbreakpad.yaml", so_path)
218+
self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
219+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
220+
self.assertEqual(1, len(modules))
221+
# LLDB makes up its own UUID as well when there is no build ID so we
222+
# will check that this matches.
223+
self.verify_module(modules[0], so_path, "D9C480E8")
224+
225+
def test_breakpad_hash_match_sysroot_decoy(self):
226+
"""
227+
Check that we can match the breakpad .text section hash when there is
228+
a module with the right name but wrong contents under a user-provided
229+
sysroot, and the right module is at the given search path..
230+
"""
231+
sysroot_path = os.path.join(self.getBuildDir(), "mock_sysroot")
232+
# Create the directory under the sysroot where the minidump reports
233+
# the module.
234+
decoy_dir = os.path.join(sysroot_path, "invalid", "path", "on", "current", "system")
235+
decoy_path = os.path.join(decoy_dir, "libbreakpad.so")
236+
lldbutil.mkdir_p(decoy_dir)
237+
self.yaml2obj("libbreakpad-decoy.yaml", decoy_path)
238+
self.runCmd("platform select remote-linux --sysroot '%s'" % sysroot_path)
239+
so_dir = os.path.join(self.getBuildDir(), "searchpath_dir")
240+
so_path = os.path.join(so_dir, "libbreakpad.so")
241+
lldbutil.mkdir_p(so_dir)
242+
self.yaml2obj("libbreakpad.yaml", so_path)
243+
self.runCmd('settings set target.exec-search-paths "%s"' % so_dir)
244+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
245+
self.assertEqual(1, len(modules))
246+
# LLDB makes up its own UUID as well when there is no build ID so we
247+
# will check that this matches.
248+
self.verify_module(modules[0], so_path, "D9C480E8")
249+
250+
def test_breakpad_overflow_hash_match(self):
251+
"""
252+
This is a similar to test_breakpad_hash_match, but it verifies that
253+
if the .text section does not end on a 16 byte boundary, then it
254+
will overflow into the next section's data by up to 15 bytes. This
255+
verifies that we are able to match what breakpad does as it will do
256+
this.
257+
"""
258+
so_path = self.getBuildArtifact("libbreakpad.so")
259+
self.yaml2obj("libbreakpad-overflow.yaml", so_path)
260+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
261+
self.dbg.HandleCommand(cmd)
262+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
263+
self.assertEqual(1, len(modules))
264+
# LLDB makes up it own UUID as well when there is no build ID so we
265+
# will check that this matches.
266+
self.verify_module(modules[0], so_path, "48EB9FD7")
267+
268+
269+
def test_facebook_hash_match(self):
270+
"""
271+
Breakpad creates minidump files using CvRecord in each module whose
272+
signature is set to PDB70 where the UUID is a hash generated by
273+
breakpad of the .text section and Facebook modified this hash to
274+
avoid collisions. This is only done when the executable has no ELF
275+
build ID.
276+
277+
This test verifies that if we have a minidump with a 16 byte UUID,
278+
that we are able to associate a symbol file with no ELF build ID
279+
and match it up by hashing the .text section like Facebook does.
280+
"""
281+
so_path = self.getBuildArtifact("libbreakpad.so")
282+
self.yaml2obj("libbreakpad.yaml", so_path)
283+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
284+
self.dbg.HandleCommand(cmd)
285+
modules = self.get_minidump_modules("linux-arm-facebook-uuid-match.yaml")
286+
self.assertEqual(1, len(modules))
287+
# LLDB makes up it own UUID as well when there is no build ID so we
288+
# will check that this matches.
289+
self.verify_module(modules[0], so_path, "D9C480E8")
290+
291+
182292
def test_relative_module_name(self):
183293
old_cwd = os.getcwd()
184294
self.addTearDownHook(lambda: os.chdir(old_cwd))
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This has different .text contents than libbreakpad-yaml,
2+
# to simulate having different versions of the module (to
3+
# test that we pick the one matching the minidump UUID).
4+
--- !ELF
5+
FileHeader:
6+
Class: ELFCLASS32
7+
Data: ELFDATA2LSB
8+
Type: ET_DYN
9+
Machine: EM_ARM
10+
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
11+
Sections:
12+
Sections:
13+
- Name: .text
14+
Type: SHT_PROGBITS
15+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
16+
Address: 0x0000000000010000
17+
AddressAlign: 0x0000000000000004
18+
Content: 040000001400000003000000474E5500CC

0 commit comments

Comments
 (0)