Skip to content

Read and store gnu build id from loaded core file #92078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions lldb/include/lldb/Target/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,36 @@ class Process : public std::enable_shared_from_this<Process>,
lldb::StateType state);
} Notifications;

class ProcessMemoryIterator {
public:
ProcessMemoryIterator(lldb::ProcessSP process_sp, lldb::addr_t base)
: m_process_sp(process_sp), m_base_addr(base) {
lldbassert(process_sp.get() != nullptr);
}

bool IsValid() { return m_is_valid; }

uint8_t operator[](lldb::addr_t offset) {
if (!IsValid())
return 0;

uint8_t retval = 0;
Status error;
if (0 ==
m_process_sp->ReadMemory(m_base_addr + offset, &retval, 1, error)) {
m_is_valid = false;
return 0;
}

return retval;
}

private:
lldb::ProcessSP m_process_sp;
lldb::addr_t m_base_addr;
bool m_is_valid = true;
};

class ProcessEventData : public EventData {
friend class Process;

Expand Down Expand Up @@ -1649,6 +1679,26 @@ class Process : public std::enable_shared_from_this<Process>,

lldb::addr_t ReadPointerFromMemory(lldb::addr_t vm_addr, Status &error);

/// Find a string within a memory region.
///
/// This function searches for the string represented by the provided buffer
/// within the memory range specified by the low and high addresses. It uses
/// a bad character heuristic to optimize the search process.
///
/// \param[in] low The starting address of the memory region to be searched.
///
/// \param[in] high The ending address of the memory region to be searched.
///
/// \param[in] buffer A pointer to the buffer containing the string to be
/// searched.
///
/// \param[in] buffer_size The size of the buffer in bytes.
///
/// \return The address where the string was found or LLDB_INVALID_ADDRESS if
/// not found.
lldb::addr_t FindInMemory(lldb::addr_t low, lldb::addr_t high,
uint8_t *buffer, size_t buffer_size);

bool WritePointerToMemory(lldb::addr_t vm_addr, lldb::addr_t ptr_value,
Status &error);

Expand Down
61 changes: 2 additions & 59 deletions lldb/source/Commands/CommandObjectMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,35 +977,6 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
Options *GetOptions() override { return &m_option_group; }

protected:
class ProcessMemoryIterator {
public:
ProcessMemoryIterator(ProcessSP process_sp, lldb::addr_t base)
: m_process_sp(process_sp), m_base_addr(base) {
lldbassert(process_sp.get() != nullptr);
}

bool IsValid() { return m_is_valid; }

uint8_t operator[](lldb::addr_t offset) {
if (!IsValid())
return 0;

uint8_t retval = 0;
Status error;
if (0 ==
m_process_sp->ReadMemory(m_base_addr + offset, &retval, 1, error)) {
m_is_valid = false;
return 0;
Comment on lines -995 to -998
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is efficient only because our process caches memory on its own... Otherwise reading memory 1 byte at a time would be more expensive. Not sure if we can get any speed up by reading more than one byte at a time and then using our own internal buffer.

}

return retval;
}

private:
ProcessSP m_process_sp;
lldb::addr_t m_base_addr;
bool m_is_valid = true;
};
void DoExecute(Args &command, CommandReturnObject &result) override {
// No need to check "process" for validity as eCommandRequiresProcess
// ensures it is valid
Expand Down Expand Up @@ -1106,8 +1077,8 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
found_location = low_addr;
bool ever_found = false;
while (count) {
found_location = FastSearch(found_location, high_addr, buffer.GetBytes(),
buffer.GetByteSize());
found_location = process->FindInMemory(
found_location, high_addr, buffer.GetBytes(), buffer.GetByteSize());
if (found_location == LLDB_INVALID_ADDRESS) {
if (!ever_found) {
result.AppendMessage("data not found within the range.\n");
Expand Down Expand Up @@ -1144,34 +1115,6 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
result.SetStatus(lldb::eReturnStatusSuccessFinishResult);
}

lldb::addr_t FastSearch(lldb::addr_t low, lldb::addr_t high, uint8_t *buffer,
size_t buffer_size) {
const size_t region_size = high - low;

if (region_size < buffer_size)
return LLDB_INVALID_ADDRESS;

std::vector<size_t> bad_char_heuristic(256, buffer_size);
ProcessSP process_sp = m_exe_ctx.GetProcessSP();
ProcessMemoryIterator iterator(process_sp, low);

for (size_t idx = 0; idx < buffer_size - 1; idx++) {
decltype(bad_char_heuristic)::size_type bcu_idx = buffer[idx];
bad_char_heuristic[bcu_idx] = buffer_size - idx - 1;
}
for (size_t s = 0; s <= (region_size - buffer_size);) {
int64_t j = buffer_size - 1;
while (j >= 0 && buffer[j] == iterator[s + j])
j--;
if (j < 0)
return low + s;
else
s += bad_char_heuristic[iterator[s + buffer_size - 1]];
}

return LLDB_INVALID_ADDRESS;
}

OptionGroupOptions m_option_group;
OptionGroupFindMemory m_memory_options;
OptionGroupMemoryTag m_memory_tag_options;
Expand Down
50 changes: 50 additions & 0 deletions lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
//
//===----------------------------------------------------------------------===//

#include <cstddef>
#include <cstdlib>

#include <memory>
#include <mutex>
#include <tuple>

#include "lldb/Core/Module.h"
#include "lldb/Core/ModuleSpec.h"
Expand Down Expand Up @@ -210,6 +212,9 @@ Status ProcessElfCore::DoLoadCore() {
}
}

// We need to update uuid after address range is populated.
UpdateBuildIdForNTFileEntries();

if (!ranges_are_sorted) {
m_core_aranges.Sort();
m_core_range_infos.Sort();
Expand Down Expand Up @@ -258,6 +263,7 @@ Status ProcessElfCore::DoLoadCore() {
if (!m_nt_file_entries.empty()) {
ModuleSpec exe_module_spec;
exe_module_spec.GetArchitecture() = arch;
exe_module_spec.GetUUID() = m_nt_file_entries[0].uuid;
exe_module_spec.GetFileSpec().SetFile(m_nt_file_entries[0].path,
FileSpec::Style::native);
if (exe_module_spec.GetFileSpec()) {
Expand All @@ -271,6 +277,16 @@ Status ProcessElfCore::DoLoadCore() {
return error;
}

void ProcessElfCore::UpdateBuildIdForNTFileEntries() {
if (!m_nt_file_entries.empty()) {
for (NT_FILE_Entry &entry : m_nt_file_entries) {
std::optional<UUID> uuid = FindBuildId(entry);
if (uuid)
entry.uuid = uuid.value();
}
}
}

lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() {
if (m_dyld_up.get() == nullptr)
m_dyld_up.reset(DynamicLoader::FindPlugin(
Expand Down Expand Up @@ -983,6 +999,40 @@ llvm::Error ProcessElfCore::ParseThreadContextsFromNoteSegment(
}
}

bool ProcessElfCore::IsElf(const NT_FILE_Entry entry) {
size_t size = strlen(llvm::ELF::ElfMagic);
uint8_t buf[size];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

15: error: variable length arrays in C++ are a Clang extension [-Werror,-Wvla-cxx-extension]
 1004 |   uint8_t buf[size];

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can allocate a buffer of 4 and read it.
Can ELFHeader::MagicBytesMatch be used here?

Status error;
size_t byte_read = ReadMemory(entry.start, buf, size, error);
if (byte_read == size)
return memcmp(llvm::ELF::ElfMagic, buf, size) == 0;
else
return false;
}

std::optional<UUID> ProcessElfCore::FindBuildId(const NT_FILE_Entry entry) {
if (!IsElf(entry))
return std::nullopt;
// Build ID is stored in the ELF file as a section named ".note.gnu.build-id"
uint8_t gnu_build_id_bytes[8] = {0x03, 0x00, 0x00, 0x00,
0x47, 0x4e, 0x55, 0x00};
lldb::addr_t gnu_build_id_addr =
FindInMemory(entry.start, entry.end, gnu_build_id_bytes, 8);
if (gnu_build_id_addr == LLDB_INVALID_ADDRESS)
return std::nullopt;
uint8_t buf[36];
Status error;
size_t byte_read = ReadMemory(gnu_build_id_addr - 8, buf, 36, error);
// .note.gnu.build-id starts with 04 00 00 00 {id_byte_size} 00 00 00 03 00 00
// 00 47 4e 55 00
Comment on lines +1026 to +1027
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be better said as:

// .note.gnu.build-id format is:
// uint32_t namesz; // Length of the name 
// uint32_t descsz; // Length of the UUID
// uint32_t type; // Set to NT_GNU_BUILD_ID (0x00000003)
// char name[namesz]; // Set to "GNU\0"

if (byte_read == 36) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not assume there are always 36 bytes here. We should read the first 8 bytes to get the namesz and descsz entries using a DataExtractor. Then do the right thing when decoding the UUID bytes by using the namesz value + 12 bytes for the namesz, descsz and type uint32_t fields. We should also make sure our length isn't out of control and too big:

DataExtractor data(buf, sizeof(buf), GetByteOrder(), GetAddressByteSize());
lldb::offset_t offset = 0; 
const uint32_t namesz = data.GetU32(&offset); // Decode the length of the note name
const uint32_t descsz = data.GetU32(&offset); // Decode the UUID length
if (0 < uuid_length && uuid_length <= 20)
  offset = 16; // Set the position to point to the UUID bytes
  return UUID(llvm::ArrayRef<uint8_t>(data.GetData(12 + namesz), descsz);
}

if (buf[0] == 0x04) {
return UUID(llvm::ArrayRef<uint8_t>(buf + 16, buf[4] /*byte size*/));
}
}
Comment on lines +1028 to +1032
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove braces per llvm coding guidelines for single statement if

return std::nullopt;
}

uint32_t ProcessElfCore::GetNumThreadContexts() {
if (!m_thread_data_valid)
DoLoadCore();
Expand Down
11 changes: 11 additions & 0 deletions lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class ProcessElfCore : public lldb_private::PostMortemProcess {
lldb::addr_t end;
lldb::addr_t file_ofs;
std::string path;
lldb_private::UUID
uuid; // extracted from .note.gnu.build-id section from core file
Comment on lines +120 to +121
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we should use std::optional<lldb_private::UUID> here

};

// For ProcessElfCore only
Expand Down Expand Up @@ -158,6 +160,15 @@ class ProcessElfCore : public lldb_private::PostMortemProcess {
// Returns number of thread contexts stored in the core file
uint32_t GetNumThreadContexts();

// Populate gnu uuid for each NT_FILE entry
void UpdateBuildIdForNTFileEntries();

// Returns the UUID of a given NT_FILE entry
std::optional<lldb_private::UUID> FindBuildId(const NT_FILE_Entry entry);

// Returns true if the given NT_FILE entry is an ELF file
bool IsElf(const NT_FILE_Entry entry);

// Parse a contiguous address range of the process from LOAD segment
lldb::addr_t
AddAddressRangeFromLoadSegment(const elf::ELFProgramHeader &header);
Expand Down
27 changes: 27 additions & 0 deletions lldb/source/Target/Process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3191,6 +3191,33 @@ Status Process::Halt(bool clear_thread_plans, bool use_run_lock) {
return Status();
}

lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
uint8_t *buffer, size_t buffer_size) {
const size_t region_size = high - low;

if (region_size < buffer_size)
return LLDB_INVALID_ADDRESS;

std::vector<size_t> bad_char_heuristic(256, buffer_size);
ProcessMemoryIterator iterator(shared_from_this(), low);

for (size_t idx = 0; idx < buffer_size - 1; idx++) {
decltype(bad_char_heuristic)::size_type bcu_idx = buffer[idx];
bad_char_heuristic[bcu_idx] = buffer_size - idx - 1;
}
for (size_t s = 0; s <= (region_size - buffer_size);) {
int64_t j = buffer_size - 1;
while (j >= 0 && buffer[j] == iterator[s + j])
j--;
if (j < 0)
return low + s;
else
s += bad_char_heuristic[iterator[s + buffer_size - 1]];
}

return LLDB_INVALID_ADDRESS;
}

Status Process::StopForDestroyOrDetach(lldb::EventSP &exit_event_sp) {
Status error;

Expand Down
Loading