-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Add support for reading the dynamic symbol table from PT_DYNAMIC #112596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
#include "llvm/Support/MathExtras.h" | ||
#include "llvm/Support/MemoryBuffer.h" | ||
#include "llvm/Support/MipsABIFlags.h" | ||
#include "lldb/Target/Process.h" | ||
|
||
#define CASE_AND_STREAM(s, def, width) \ | ||
case def: \ | ||
|
@@ -2990,9 +2991,10 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { | |
// section, nomatter if .symtab was already parsed or not. This is because | ||
// minidebuginfo normally removes the .symtab symbols which have their | ||
// matching .dynsym counterparts. | ||
Section *dynsym = nullptr; | ||
if (!symtab || | ||
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { | ||
Section *dynsym = | ||
dynsym = | ||
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) | ||
.get(); | ||
if (dynsym) { | ||
|
@@ -3002,6 +3004,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { | |
m_address_class_map.merge(address_class_map); | ||
} | ||
} | ||
if (!dynsym) { | ||
// Try and read the dynamic symbol table from the .dynamic section. | ||
uint32_t num_symbols = 0; | ||
std::optional<DataExtractor> symtab_data = | ||
GetDynsymDataFromDynamic(num_symbols); | ||
std::optional<DataExtractor> strtab_data = GetDynstrData(); | ||
if (symtab_data && strtab_data) { | ||
auto [num_symbols_parsed, address_class_map] = | ||
ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols, | ||
symtab_data.value(), strtab_data.value()); | ||
symbol_id += num_symbols_parsed; | ||
m_address_class_map.merge(address_class_map); | ||
} | ||
} | ||
|
||
// DT_JMPREL | ||
// If present, this entry's d_ptr member holds the address of | ||
|
@@ -3811,6 +3827,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size, | |
Offset); | ||
} | ||
|
||
std::optional<DataExtractor> | ||
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length, | ||
uint64_t offset) { | ||
// ELFDynamic values contain a "d_ptr" member that will be a load address if | ||
// we have an ELF file read from memory, or it will be a file address if it | ||
// was read from a ELF file. This function will correctly fetch data pointed | ||
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't | ||
// available. | ||
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset; | ||
if (ProcessSP process_sp = m_process_wp.lock()) { | ||
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length)) | ||
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); | ||
} else { | ||
// We have an ELF file with no section headers or we didn't find the | ||
// .dynamic section. Try and find the .dynstr section. | ||
Address addr; | ||
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList())) | ||
return std::nullopt; | ||
DataExtractor data; | ||
addr.GetSection()->GetSectionData(data); | ||
return DataExtractor(data, | ||
d_ptr_addr - addr.GetSection()->GetFileAddress(), | ||
length); | ||
} | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() { | ||
if (SectionList *section_list = GetSectionList()) { | ||
// Find the SHT_DYNAMIC section. | ||
|
@@ -3838,31 +3881,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() { | |
// and represent the dynamic symbol tables's string table. These are needed | ||
// by the dynamic loader and we can read them from a process' address space. | ||
// | ||
// When loading and ELF file from memory, only the program headers end up | ||
// being mapped into memory, and we can find these values in the PT_DYNAMIC | ||
// segment. | ||
// When loading and ELF file from memory, only the program headers are | ||
// guaranteed end up being mapped into memory, and we can find these values in | ||
// the PT_DYNAMIC segment. | ||
const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB); | ||
const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ); | ||
if (strtab == nullptr || strsz == nullptr) | ||
return std::nullopt; | ||
|
||
if (ProcessSP process_sp = m_process_wp.lock()) { | ||
if (DataBufferSP data_sp = | ||
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val)) | ||
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); | ||
} else { | ||
// We have an ELF file with no section headers or we didn't find the | ||
// .dynamic section. Try and find the .dynstr section. | ||
Address addr; | ||
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) { | ||
DataExtractor data; | ||
addr.GetSection()->GetSectionData(data); | ||
return DataExtractor(data, | ||
strtab->d_ptr - addr.GetSection()->GetFileAddress(), | ||
strsz->d_val); | ||
} | ||
} | ||
return std::nullopt; | ||
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0); | ||
} | ||
|
||
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() { | ||
|
@@ -3895,3 +3922,116 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() { | |
} | ||
return std::nullopt; | ||
} | ||
|
||
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() { | ||
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH); | ||
if (hash == nullptr) | ||
return std::nullopt; | ||
|
||
// The DT_HASH header looks like this: | ||
struct DtHashHeader { | ||
uint32_t nbucket; | ||
uint32_t nchain; | ||
}; | ||
if (auto data = ReadDataFromDynamic(hash, 8)) { | ||
// We don't need the number of buckets value "nbucket", we just need the | ||
// "nchain" value which contains the number of symbols. | ||
offset_t offset = offsetof(DtHashHeader, nchain); | ||
return data->GetU32(&offset); | ||
} | ||
|
||
return std::nullopt; | ||
} | ||
|
||
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() { | ||
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH); | ||
if (gnu_hash == nullptr) | ||
return std::nullopt; | ||
|
||
// Create a DT_GNU_HASH header | ||
// https://flapenguin.me/elf-dt-gnu-hash | ||
struct DtGnuHashHeader { | ||
uint32_t nbuckets = 0; | ||
uint32_t symoffset = 0; | ||
uint32_t bloom_size = 0; | ||
uint32_t bloom_shift = 0; | ||
}; | ||
uint32_t num_symbols = 0; | ||
// Read enogh data for the DT_GNU_HASH header so we can extract the values. | ||
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) { | ||
offset_t offset = 0; | ||
DtGnuHashHeader header; | ||
header.nbuckets = data->GetU32(&offset); | ||
header.symoffset = data->GetU32(&offset); | ||
header.bloom_size = data->GetU32(&offset); | ||
header.bloom_shift = data->GetU32(&offset); | ||
const size_t addr_size = GetAddressByteSize(); | ||
const addr_t buckets_offset = | ||
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size; | ||
std::vector<uint32_t> buckets; | ||
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) { | ||
offset = 0; | ||
for (uint32_t i = 0; i < header.nbuckets; ++i) | ||
buckets.push_back(bucket_data->GetU32(&offset)); | ||
// Locate the chain that handles the largest index bucket. | ||
uint32_t last_symbol = 0; | ||
for (uint32_t bucket_value : buckets) | ||
last_symbol = std::max(bucket_value, last_symbol); | ||
if (last_symbol < header.symoffset) { | ||
num_symbols = header.symoffset; | ||
} else { | ||
// Walk the bucket's chain to add the chain length to the total. | ||
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4; | ||
for (;;) { | ||
if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) { | ||
offset = 0; | ||
uint32_t chain_entry = chain_entry_data->GetU32(&offset); | ||
++last_symbol; | ||
// If the low bit is set, this entry is the end of the chain. | ||
if (chain_entry & 1) | ||
break; | ||
} else { | ||
break; | ||
} | ||
} | ||
num_symbols = last_symbol; | ||
} | ||
} | ||
} | ||
if (num_symbols > 0) | ||
return num_symbols; | ||
|
||
return std::nullopt; | ||
} | ||
|
||
std::optional<DataExtractor> | ||
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) { | ||
// Every ELF file which represents an executable or shared library has | ||
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the | ||
// symbol table, and DT_SYMENT contains the size of a symbol table entry. | ||
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of | ||
// symbols in the symbol table as the symbol count is not stored in the | ||
// .dynamic section as a key/value pair. | ||
// | ||
// When loading and ELF file from memory, only the program headers end up | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great comment, but I would rephrase that only the program headers are guaranteed to be mapped |
||
// being mapped into memory, and we can find these values in the PT_DYNAMIC | ||
// segment. | ||
num_symbols = 0; | ||
// Get the process in case this is an in memory ELF file. | ||
ProcessSP process_sp(m_process_wp.lock()); | ||
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB); | ||
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT); | ||
// DT_SYMTAB and DT_SYMENT are mandatory. | ||
if (symtab == nullptr || syment == nullptr) | ||
return std::nullopt; | ||
|
||
if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash()) | ||
num_symbols = *syms; | ||
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash()) | ||
num_symbols = *syms; | ||
else | ||
return std::nullopt; | ||
if (num_symbols == 0) | ||
return std::nullopt; | ||
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols); | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,42 @@ | ||||||||||||
// This test verifies that loading an ELF file that has no section headers can | ||||||||||||
// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or | ||||||||||||
// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC | ||||||||||||
// segment. | ||||||||||||
|
||||||||||||
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | ||||||||||||
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | ||||||||||||
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \ | ||||||||||||
Comment on lines
+6
to
+8
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
.. and put the asm input into this file. I don't think I would also recommend more unique names and thinking about whether there are any corner cases that are worth explicitly testing (e.g. deliberately creating some hash collisions?). |
||||||||||||
// RUN: -z nosectionheader -o %t.gnu | ||||||||||||
// RUN: %lldb %t.gnu -b \ | ||||||||||||
// RUN: -o "image dump objfile" \ | ||||||||||||
// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU | ||||||||||||
// GNU: (lldb) image dump objfile | ||||||||||||
// GNU: Dumping headers for 1 module(s). | ||||||||||||
// GNU: ObjectFileELF, file = | ||||||||||||
// GNU: ELF Header | ||||||||||||
// GNU: e_type = 0x0003 ET_DYN | ||||||||||||
// Make sure there are no section headers | ||||||||||||
// GNU: e_shnum = 0x00000000 | ||||||||||||
// Make sure we were able to load the symbols | ||||||||||||
// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2: | ||||||||||||
// GNU-DAG: undefined | ||||||||||||
// GNU-DAG: defined | ||||||||||||
|
||||||||||||
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | ||||||||||||
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | ||||||||||||
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \ | ||||||||||||
Comment on lines
+25
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(we can reuse the object file) |
||||||||||||
// RUN: -z nosectionheader -o %t.sysv | ||||||||||||
// RUN: %lldb %t.sysv -b \ | ||||||||||||
// RUN: -o "image dump objfile" \ | ||||||||||||
// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH | ||||||||||||
// HASH: (lldb) image dump objfile | ||||||||||||
// HASH: Dumping headers for 1 module(s). | ||||||||||||
// HASH: ObjectFileELF, file = | ||||||||||||
// HASH: ELF Header | ||||||||||||
// HASH: e_type = 0x0003 ET_DYN | ||||||||||||
// Make sure there are no section headers | ||||||||||||
// HASH: e_shnum = 0x00000000 | ||||||||||||
// Make sure we were able to load the symbols | ||||||||||||
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2: | ||||||||||||
// HASH-DAG: undefined | ||||||||||||
// HASH-DAG: defined | ||||||||||||
Comment on lines
+32
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In cases like this where the two outputs are (nearly) identical, I think its better to have a one check prefix for the common parts of the output -- it's less code and it better emphasizes the difference. This can be achieved by passing different sets of prefixes to the FileCheck command:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: empty line
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw, this grouping of include blocks with empty lines is an lldb-ism (other llvm subprojects don't have that), and one not very strictly adhered to (it looks like this file does not put lldb includes in a separate block). One problem with adding empty lines like that is that it prevents clang-format from sorting the includes. Right now clang-format (correctly) wants to put this include next to the other lldb includes. If you added an empty line, it would leave it alone, thinking that is intentional (which I don't think it is).