Skip to content

[GSYM] Add support for querying merged functions in llvm-gsymutil #120991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,17 @@ struct FunctionInfo {
///
/// \param Addr The address to lookup.
///
/// \param MergedFuncsData A pointer to an optional DataExtractor that, if
/// non-null, will be set to the raw data of the MergedFunctionInfo, if
/// present.
///
/// \returns An LookupResult or an error describing the issue that was
/// encountered during decoding. An error should only be returned if the
/// address is not contained in the FunctionInfo or if the data is corrupted.
static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
const GsymReader &GR,
uint64_t FuncAddr,
uint64_t Addr);
static llvm::Expected<LookupResult>
lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData = nullptr);

uint64_t startAddress() const { return Range.start(); }
uint64_t endAddress() const { return Range.end(); }
Expand Down
21 changes: 20 additions & 1 deletion llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,29 @@ class GsymReader {
/// is much faster for lookups.
///
/// \param Addr A virtual address from the orignal object file to lookup.
///
/// \param MergedFuncsData A pointer to an optional DataExtractor that, if
/// non-null, will be set to the raw data of the MergedFunctionInfo, if
/// present.
///
/// \returns An expected LookupResult that contains only the information
/// needed for the current address, or an error object that indicates reason
/// for failing to lookup the address.
llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
llvm::Expected<LookupResult>
lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData = nullptr) const;

/// Lookup all merged functions for a given address.
///
/// This function performs a lookup for the specified address and then
/// retrieves additional LookupResults from any merged functions associated
/// with the primary LookupResult.
///
/// \param Addr The address to lookup.
///
/// \returns A vector of LookupResult objects, where the first element is the
/// primary result, followed by results for any merged functions
llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;

/// Get a string from the string table.
///
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ struct MergedFunctionsInfo {
/// \returns A boolean indicating if this FunctionInfo is valid.
bool isValid() { return !MergedFunctions.empty(); }

/// Get a vector of DataExtractor objects for the functions in this
/// MergedFunctionsInfo object.
///
/// \param Data The binary stream to read the data from. This object must have
/// the data for the MergedFunctionsInfo object starting at offset zero. The
/// data can contain more data than needed.
///
/// \returns An llvm::Expected containing a vector of DataExtractor objects on
/// success, or an error object if parsing fails.
static llvm::Expected<std::vector<DataExtractor>>
getFuncsDataExtractors(DataExtractor &Data);

/// Decode an MergedFunctionsInfo object from a binary data stream.
///
/// \param Data The binary stream to read the data from. This object must have
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
return FuncInfoOffset;
}

llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
const GsymReader &GR,
uint64_t FuncAddr,
uint64_t Addr) {
llvm::Expected<LookupResult>
FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
uint64_t FuncAddr, uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData) {
LookupResult LR;
LR.LookupAddr = Addr;
uint64_t Offset = 0;
Expand Down Expand Up @@ -289,6 +289,12 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
return ExpectedLE.takeError();
break;

case InfoType::MergedFunctionsInfo:
// Store the merged functions data for later parsing, if needed.
if (MergedFuncsData)
*MergedFuncsData = InfoData;
break;

case InfoType::InlineInfo:
// We will parse the inline info after our line table, but only if
// we have a line entry.
Expand Down
42 changes: 40 additions & 2 deletions llvm/lib/DebugInfo/GSYM/GsymReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,14 +334,52 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
return ExpectedData.takeError();
}

llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
llvm::Expected<LookupResult>
GsymReader::lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFunctionsData) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);
return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
MergedFunctionsData);
else
return ExpectedData.takeError();
}

llvm::Expected<std::vector<LookupResult>>
GsymReader::lookupAll(uint64_t Addr) const {
std::vector<LookupResult> Results;
std::optional<DataExtractor> MergedFunctionsData;

// First perform a lookup to get the primary function info result.
auto MainResult = lookup(Addr, &MergedFunctionsData);
if (!MainResult)
return MainResult.takeError();

// Add the main result as the first entry.
Results.push_back(std::move(*MainResult));

// Now process any merged functions data that was found during the lookup.
if (MergedFunctionsData) {
// Get data extractors for each merged function.
auto ExpectedMergedFuncExtractors =
MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
if (!ExpectedMergedFuncExtractors)
return ExpectedMergedFuncExtractors.takeError();

// Process each merged function data.
for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
if (auto FI = FunctionInfo::lookup(MergedData, *this,
MainResult->FuncRange.start(), Addr)) {
Results.push_back(std::move(*FI));
} else {
return FI.takeError();
}
}
}

return Results;
}

void GsymReader::dump(raw_ostream &OS) {
const auto &Header = getHeader();
// Dump the GSYM header.
Expand Down
53 changes: 45 additions & 8 deletions llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
llvm::Expected<MergedFunctionsInfo>
MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
MergedFunctionsInfo MFI;
auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data);

if (!FuncExtractorsOrError)
return FuncExtractorsOrError.takeError();

for (DataExtractor &FuncData : *FuncExtractorsOrError) {
llvm::Expected<FunctionInfo> FI = FunctionInfo::decode(FuncData, BaseAddr);
if (!FI)
return FI.takeError();
MFI.MergedFunctions.push_back(std::move(*FI));
}

return MFI;
}

llvm::Expected<std::vector<DataExtractor>>
MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
std::vector<DataExtractor> Results;
uint64_t Offset = 0;

// Ensure there is enough data to read the function count.
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(
std::errc::io_error,
"unable to read the function count at offset 0x%8.8" PRIx64, Offset);

uint32_t Count = Data.getU32(&Offset);

for (uint32_t i = 0; i < Count; ++i) {
// Ensure there is enough data to read the function size.
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(
std::errc::io_error,
"unable to read size of function %u at offset 0x%8.8" PRIx64, i,
Offset);

uint32_t FnSize = Data.getU32(&Offset);
DataExtractor FnData(Data.getData().substr(Offset, FnSize),

// Ensure there is enough data for the function content.
if (!Data.isValidOffsetForDataOfSize(Offset, FnSize))
return createStringError(
std::errc::io_error,
"function data is truncated for function %u at offset 0x%8.8" PRIx64
", expected size %u",
i, Offset, FnSize);

// Extract the function data.
Results.emplace_back(Data.getData().substr(Offset, FnSize),
Data.isLittleEndian(), Data.getAddressSize());
llvm::Expected<FunctionInfo> FI =
FunctionInfo::decode(FnData, BaseAddr + Offset);
if (!FI)
return FI.takeError();
MFI.MergedFunctions.push_back(std::move(*FI));

Offset += FnSize;
}

return MFI;
return Results;
}

bool operator==(const MergedFunctionsInfo &LHS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6

## Test the lookup functionality for merged functions:
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s

# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5

# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5


--- !mach-o
Expand Down
5 changes: 4 additions & 1 deletion llvm/tools/llvm-gsymutil/Opts.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ defm convert :
Eq<"convert",
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
def merged_functions :
FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n"
"Without this option one function per unique address range will be emitted.\n"
"When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n"
"Without this option only one function will be displayed.">;
def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
defm callsites_yaml_file :
Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
Expand Down
49 changes: 32 additions & 17 deletions llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static uint64_t SegmentSize;
static bool Quiet;
static std::vector<uint64_t> LookupAddresses;
static bool LookupAddressesFromStdin;
static bool StoreMergedFunctionInfo = false;
static bool UseMergedFunctions = false;
static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;

Expand Down Expand Up @@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) {
}

LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
UseMergedFunctions = Args.hasArg(OPT_merged_functions);

if (Args.hasArg(OPT_callsites_yaml_file_EQ)) {
CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ);
Expand Down Expand Up @@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
// functions in the first FunctionInfo with that address range. Do this right
// after loading the DWARF data so we don't have to deal with functions from
// the symbol table.
if (StoreMergedFunctionInfo)
if (UseMergedFunctions)
Gsym.prepareMergedFunctions(Out);

// Get the UUID and convert symbol table to GSYM.
Expand Down Expand Up @@ -508,24 +508,39 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
}

static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (auto Result = Gsym.lookup(Addr)) {
// If verbose is enabled dump the full function info for the address.
if (Verbose) {
if (auto FI = Gsym.getFunctionInfo(Addr)) {
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
Gsym.dump(OS, *FI);
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
auto logError = [Addr, &OS](Error E) {
OS << HEX64(Addr) << ": ";
logAllUnhandledErrors(std::move(E), OS, "error: ");
};

if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
OS << "Found " << Results->size() << " functions at address "
<< HEX64(Addr) << ":\n";
for (size_t i = 0; i < Results->size(); ++i) {
OS << " " << Results->at(i);

if (i != Results->size() - 1)
OS << "\n";
}
}
OS << Result.get();
} else {
if (Verbose)
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
OS << HEX64(Addr) << ": ";
logAllUnhandledErrors(Result.takeError(), OS, "error: ");
} else { /* UseMergedFunctions == false */
if (auto Result = Gsym.lookup(Addr)) {
OS << Result.get();
} else {
logError(Result.takeError());
return;
}
}
if (Verbose)

if (Verbose) {
if (auto FI = Gsym.getFunctionInfo(Addr)) {
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
Gsym.dump(OS, *FI);
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
}
OS << "\n";
}
}

int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
Expand Down
Loading