Skip to content

Commit 0ff6ba4

Browse files
committed
Add support for reading the dynamic symbol table from program headers and PT_DYNAMIC.
Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
1 parent 0f8dbb2 commit 0ff6ba4

File tree

3 files changed

+811
-17
lines changed

3 files changed

+811
-17
lines changed

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp

Lines changed: 146 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
#include "llvm/Support/MathExtras.h"
4545
#include "llvm/Support/MemoryBuffer.h"
4646
#include "llvm/Support/MipsABIFlags.h"
47+
#include "lldb/Target/Process.h"
48+
4749

4850
#define CASE_AND_STREAM(s, def, width) \
4951
case def: \
@@ -2990,18 +2992,34 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
29902992
// section, nomatter if .symtab was already parsed or not. This is because
29912993
// minidebuginfo normally removes the .symtab symbols which have their
29922994
// matching .dynsym counterparts.
2995+
bool found_dynsym = false;
29932996
if (!symtab ||
29942997
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
29952998
Section *dynsym =
29962999
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
29973000
.get();
29983001
if (dynsym) {
3002+
found_dynsym = true;
29993003
auto [num_symbols, address_class_map] =
30003004
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
30013005
symbol_id += num_symbols;
30023006
m_address_class_map.merge(address_class_map);
30033007
}
30043008
}
3009+
if (!found_dynsym) {
3010+
// Try and read the dynamic symbol table from the .dynamic section.
3011+
uint32_t num_symbols = 0;
3012+
std::optional<DataExtractor> symtab_data =
3013+
GetDynsymDataFromDynamic(num_symbols);
3014+
std::optional<DataExtractor> strtab_data = GetDynstrData();
3015+
if (symtab_data && strtab_data) {
3016+
auto [num_symbols_parsed, address_class_map] =
3017+
ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
3018+
symtab_data.value(), strtab_data.value());
3019+
symbol_id += num_symbols_parsed;
3020+
m_address_class_map.merge(address_class_map);
3021+
}
3022+
}
30053023

30063024
// DT_JMPREL
30073025
// If present, this entry's d_ptr member holds the address of
@@ -3811,6 +3829,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
38113829
Offset);
38123830
}
38133831

3832+
std::optional<DataExtractor>
3833+
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
3834+
uint64_t offset) {
3835+
// ELFDynamic values contain a "d_ptr" member that will be a load address if
3836+
// we have an ELF file read from memory, or it will be a file address if it
3837+
// was read from a ELF file. This function will correctly fetch data pointed
3838+
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
3839+
// available.
3840+
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
3841+
if (ProcessSP process_sp = m_process_wp.lock()) {
3842+
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
3843+
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3844+
} else {
3845+
// We have an ELF file with no section headers or we didn't find the
3846+
// .dynamic section. Try and find the .dynstr section.
3847+
Address addr;
3848+
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
3849+
return std::nullopt;
3850+
DataExtractor data;
3851+
addr.GetSection()->GetSectionData(data);
3852+
return DataExtractor(data,
3853+
d_ptr_addr - addr.GetSection()->GetFileAddress(),
3854+
length);
3855+
}
3856+
return std::nullopt;
3857+
}
3858+
38143859
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38153860
if (SectionList *section_list = GetSectionList()) {
38163861
// Find the SHT_DYNAMIC section.
@@ -3846,23 +3891,7 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38463891
if (strtab == nullptr || strsz == nullptr)
38473892
return std::nullopt;
38483893

3849-
if (ProcessSP process_sp = m_process_wp.lock()) {
3850-
if (DataBufferSP data_sp =
3851-
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
3852-
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3853-
} else {
3854-
// We have an ELF file with no section headers or we didn't find the
3855-
// .dynamic section. Try and find the .dynstr section.
3856-
Address addr;
3857-
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
3858-
DataExtractor data;
3859-
addr.GetSection()->GetSectionData(data);
3860-
return DataExtractor(data,
3861-
strtab->d_ptr - addr.GetSection()->GetFileAddress(),
3862-
strsz->d_val);
3863-
}
3864-
}
3865-
return std::nullopt;
3894+
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
38663895
}
38673896

38683897
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3895,3 +3924,103 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
38953924
}
38963925
return std::nullopt;
38973926
}
3927+
3928+
3929+
std::optional<DataExtractor>
3930+
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
3931+
// Every ELF file which represents an executable or shared library has
3932+
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
3933+
// symbol table, and DT_SYMENT contains the size of a symbol table entry.
3934+
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of
3935+
// symbols in the symbol table as the symbol count is not stored in the
3936+
// .dynamic section as a key/value pair.
3937+
//
3938+
// When loading and ELF file from memory, only the program headers end up
3939+
// being mapped into memory, and we can find these values in the PT_DYNAMIC
3940+
// segment.
3941+
num_symbols = 0;
3942+
// Get the process in case this is an in memory ELF file.
3943+
ProcessSP process_sp(m_process_wp.lock());
3944+
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
3945+
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
3946+
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
3947+
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
3948+
// DT_SYMTAB and DT_SYMENT are mandatory.
3949+
if (symtab == nullptr || syment == nullptr)
3950+
return std::nullopt;
3951+
// We must have either a DT_HASH or a DT_GNU_HASH.
3952+
if (hash == nullptr && gnu_hash == nullptr)
3953+
return std::nullopt;
3954+
// The number of symbols in the symbol table is the number of entries in the
3955+
// symbol table divided by the size of each symbol table entry.
3956+
// We must figure out the number of symbols in the symbol table using the
3957+
// DT_HASH or the DT_GNU_HASH as the number of symbols isn't stored anywhere
3958+
// in the .dynamic section.
3959+
3960+
lldb::offset_t offset;
3961+
if (hash) {
3962+
// The DT_HASH header contains the number of symbols in the "nchain"
3963+
// member. The header looks like this:
3964+
// struct DT_HASH_HEADER {
3965+
// uint32_t nbucket;
3966+
// uint32_t nchain;
3967+
// };
3968+
if (auto data = ReadDataFromDynamic(hash, 8)) {
3969+
offset = 4;
3970+
num_symbols = data->GetU32(&offset);
3971+
}
3972+
}
3973+
if (num_symbols == 0 && gnu_hash) {
3974+
struct DT_GNU_HASH_HEADER {
3975+
uint32_t nbuckets = 0;
3976+
uint32_t symoffset = 0;
3977+
uint32_t bloom_size = 0;
3978+
uint32_t bloom_shift = 0;
3979+
};
3980+
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DT_GNU_HASH_HEADER))) {
3981+
offset = 0;
3982+
DT_GNU_HASH_HEADER header;
3983+
header.nbuckets = data->GetU32(&offset);
3984+
header.symoffset = data->GetU32(&offset);
3985+
header.bloom_size = data->GetU32(&offset);
3986+
header.bloom_shift = data->GetU32(&offset);
3987+
const size_t addr_size = GetAddressByteSize();
3988+
const addr_t buckets_offset =
3989+
sizeof(DT_GNU_HASH_HEADER) + addr_size * header.bloom_size;
3990+
std::vector<uint32_t> buckets;
3991+
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
3992+
offset = 0;
3993+
for (uint32_t i = 0; i < header.nbuckets; ++i)
3994+
buckets.push_back(bucket_data->GetU32(&offset));
3995+
// Locate the chain that handles the largest index bucket.
3996+
uint32_t last_symbol = 0;
3997+
for (uint32_t bucket_value : buckets)
3998+
last_symbol = std::max(bucket_value, last_symbol);
3999+
if (last_symbol < header.symoffset) {
4000+
num_symbols = header.symoffset;
4001+
} else {
4002+
// Walk the bucket's chain to add the chain length to the total.
4003+
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
4004+
for (;;) {
4005+
if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
4006+
offset = 0;
4007+
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
4008+
++last_symbol;
4009+
// If the low bit is set, this entry is the end of the chain.
4010+
if (chain_entry & 1)
4011+
break;
4012+
} else {
4013+
break;
4014+
}
4015+
}
4016+
num_symbols = last_symbol;
4017+
}
4018+
}
4019+
}
4020+
if (num_symbols > 0)
4021+
++num_symbols; // First symbol is always all zeros
4022+
}
4023+
if (num_symbols == 0)
4024+
return std::nullopt;
4025+
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
4026+
}

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,40 @@ class ObjectFileELF : public lldb_private::ObjectFile {
435435
/// \return The bytes that represent the string table data or \c std::nullopt
436436
/// if an error occured.
437437
std::optional<lldb_private::DataExtractor> GetDynstrData();
438+
439+
/// Read the bytes pointed to by the \a dyn dynamic entry.
440+
///
441+
/// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
442+
/// form a file on disk, or they contain load addresses if they were read
443+
/// from memory. This function will correctly extract the data in both cases
444+
/// if it is available.
445+
///
446+
/// \param[in] dyn The dynamic entry to use to fetch the data from.
447+
///
448+
/// \param[in] length The number of bytes to read.
449+
///
450+
/// \param[in] offset The number of bytes to skip after the d_ptr value
451+
/// before reading data.
452+
///
453+
/// \return The bytes that represent the dynanic entries data or
454+
/// \c std::nullopt if an error occured or the data is not available.
455+
std::optional<lldb_private::DataExtractor>
456+
ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
457+
uint64_t offset = 0);
458+
459+
/// Get the bytes that represent the dynamic symbol table from the .dynamic
460+
/// section from process memory.
461+
///
462+
/// This functon uses the DT_SYMTAB value from the .dynamic section to read
463+
/// the symbols table data from process memory. The number of symbols in the
464+
/// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
465+
/// values as the symbol count isn't stored in the .dynamic section.
466+
///
467+
/// \return The bytes that represent the symbol table data from the .dynamic
468+
/// section or section headers or \c std::nullopt if an error
469+
/// occured or if there is no dynamic symbol data available.
470+
std::optional<lldb_private::DataExtractor>
471+
GetDynsymDataFromDynamic(uint32_t &num_symbols);
438472
};
439473

440474
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H

0 commit comments

Comments
 (0)