Skip to content

Commit 4bf853e

Browse files
author
Alex B
committed
[GSYM] Add support for querying merged functions in llvm-gsymutil
Adds the ability to lookup and display all merged functions for an address in llvm-gsymutil. Now, when `--merged-functions` is used in combination with `--address/--addresses-from-stdin`, lookup results will contain information about merged functions, if available. To support printing merged function information when using the `--verbose` option, the `LookupResult` data structure also had to be extended with pointers to the raw function data and raw merged function data. This is because merged functions share the same address range, so it's not easy to look up the raw merged function data for a particular `LookupResult` that is based on a merged function.
1 parent 8a62104 commit 4bf853e

File tree

9 files changed

+174
-12
lines changed

9 files changed

+174
-12
lines changed

llvm/include/llvm/DebugInfo/GSYM/GsymReader.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,17 @@ class GsymReader {
132132
/// for failing to lookup the address.
133133
llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
134134

135+
/// Lookup all merged functions for a given address.
136+
///
137+
/// This function performs a lookup for the specified address and then
138+
/// retrieves additional LookupResults from any merged functions associated
139+
/// with the primary LookupResult.
140+
///
141+
/// \param Addr The address to lookup.
142+
/// \returns A vector of LookupResult objects, where the first element is the
143+
/// primary result, followed by results for any merged functions
144+
llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
145+
135146
/// Get a string from the string table.
136147
///
137148
/// \param Offset The string table offset for the string to retrieve.

llvm/include/llvm/DebugInfo/GSYM/LookupResult.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "llvm/ADT/AddressRanges.h"
1313
#include "llvm/ADT/StringRef.h"
14+
#include "llvm/Support/DataExtractor.h"
1415
#include <inttypes.h>
1516
#include <vector>
1617

@@ -50,6 +51,17 @@ struct LookupResult {
5051
/// array, and the concrete function will appear at the end of the array.
5152
SourceLocations Locations;
5253
std::string getSourceFile(uint32_t Index) const;
54+
55+
/// Optional DataExtractor containing the merged functions data.
56+
/// This is only populated during lookups if merged function information
57+
/// was present. This is an optimization to avoid parsing the
58+
/// MergedFunctionsInfo data unless needed.
59+
std::optional<DataExtractor> MergedFunctionsData;
60+
61+
/// The binary data used to decode the FunctionInfo from which this
62+
/// LookupResult was created. This can be used to re-decode the entire
63+
/// FunctionInfo if desired.
64+
std::optional<DataExtractor> FunctionInfoData;
5365
};
5466

5567
inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {

llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ struct MergedFunctionsInfo {
3131
/// \returns A boolean indicating if this FunctionInfo is valid.
3232
bool isValid() { return !MergedFunctions.empty(); }
3333

34+
/// Get a vector of DataExtractor objects for the functions in this
35+
/// MergedFunctionsInfo object.
36+
///
37+
/// \param Data The binary stream to read the data from. This object must have
38+
/// the data for the MergedFunctionsInfo object starting at offset zero. The
39+
/// data can contain more data than needed.
40+
///
41+
/// \returns An llvm::Expected containing a vector of DataExtractor objects on
42+
/// success, or an error object if parsing fails.
43+
static llvm::Expected<std::vector<DataExtractor>>
44+
getFuncsDataExtractors(DataExtractor &Data);
45+
3446
/// Decode an MergedFunctionsInfo object from a binary data stream.
3547
///
3648
/// \param Data The binary stream to read the data from. This object must have

llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
241241
uint64_t Addr) {
242242
LookupResult LR;
243243
LR.LookupAddr = Addr;
244+
LR.FunctionInfoData = Data;
244245
uint64_t Offset = 0;
245246
LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(&Offset)};
246247
uint32_t NameOffset = Data.getU32(&Offset);
@@ -289,6 +290,11 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
289290
return ExpectedLE.takeError();
290291
break;
291292

293+
case InfoType::MergedFunctionsInfo:
294+
// Store the merged functions data for later parsing, if needed.
295+
LR.MergedFunctionsData = InfoData;
296+
break;
297+
292298
case InfoType::InlineInfo:
293299
// We will parse the inline info after our line table, but only if
294300
// we have a line entry.

llvm/lib/DebugInfo/GSYM/GsymReader.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,41 @@ llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
342342
return ExpectedData.takeError();
343343
}
344344

345+
llvm::Expected<std::vector<LookupResult>>
346+
GsymReader::lookupAll(uint64_t Addr) const {
347+
std::vector<LookupResult> Results;
348+
349+
// First perform a lookup to get the primary function info result
350+
auto MainResult = lookup(Addr);
351+
if (!MainResult)
352+
return MainResult.takeError();
353+
354+
// Add the main result as the first entry
355+
Results.push_back(std::move(*MainResult));
356+
357+
// Now process any merged functions data that was found during the lookup
358+
if (MainResult->MergedFunctionsData) {
359+
// Get data extractors for each merged function
360+
auto ExpectedMergedFuncExtractors =
361+
MergedFunctionsInfo::getFuncsDataExtractors(
362+
*MainResult->MergedFunctionsData);
363+
if (!ExpectedMergedFuncExtractors)
364+
return ExpectedMergedFuncExtractors.takeError();
365+
366+
// Process each merged function data
367+
for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
368+
if (auto FI = FunctionInfo::lookup(MergedData, *this,
369+
MainResult->FuncRange.start(), Addr)) {
370+
Results.push_back(std::move(*FI));
371+
} else {
372+
return FI.takeError();
373+
}
374+
}
375+
}
376+
377+
return Results;
378+
}
379+
345380
void GsymReader::dump(raw_ostream &OS) {
346381
const auto &Header = getHeader();
347382
// Dump the GSYM header.

llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
3232
return Error::success();
3333
}
3434

35-
llvm::Expected<MergedFunctionsInfo>
36-
MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
37-
MergedFunctionsInfo MFI;
35+
llvm::Expected<std::vector<DataExtractor>>
36+
MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
37+
std::vector<DataExtractor> Results;
3838
uint64_t Offset = 0;
39+
40+
// Ensure there is enough data to read the function count.
41+
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
42+
return createStringError(
43+
std::errc::io_error,
44+
"unable to read the function count at offset 0x%8.8" PRIx64, Offset);
45+
3946
uint32_t Count = Data.getU32(&Offset);
4047

4148
for (uint32_t i = 0; i < Count; ++i) {
49+
// Ensure there is enough data to read the function size.
50+
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
51+
return createStringError(
52+
std::errc::io_error,
53+
"unable to read size of function %u at offset 0x%8.8" PRIx64, i,
54+
Offset);
55+
4256
uint32_t FnSize = Data.getU32(&Offset);
43-
DataExtractor FnData(Data.getData().substr(Offset, FnSize),
57+
58+
// Ensure there is enough data for the function content.
59+
if (!Data.isValidOffsetForDataOfSize(Offset, FnSize))
60+
return createStringError(
61+
std::errc::io_error,
62+
"function data is truncated for function %u at offset 0x%8.8" PRIx64
63+
", expected size %u",
64+
i, Offset, FnSize);
65+
66+
// Extract the function data.
67+
Results.emplace_back(Data.getData().substr(Offset, FnSize),
4468
Data.isLittleEndian(), Data.getAddressSize());
45-
llvm::Expected<FunctionInfo> FI =
46-
FunctionInfo::decode(FnData, BaseAddr + Offset);
69+
70+
Offset += FnSize;
71+
}
72+
return Results;
73+
}
74+
75+
llvm::Expected<MergedFunctionsInfo>
76+
MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
77+
MergedFunctionsInfo MFI;
78+
auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data);
79+
80+
if (!FuncExtractorsOrError)
81+
return FuncExtractorsOrError.takeError();
82+
83+
for (DataExtractor &FuncData : *FuncExtractorsOrError) {
84+
llvm::Expected<FunctionInfo> FI = FunctionInfo::decode(FuncData, BaseAddr);
4785
if (!FI)
4886
return FI.takeError();
4987
MFI.MergedFunctions.push_back(std::move(*FI));
50-
Offset += FnSize;
5188
}
5289

5390
return MFI;

llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@
6464
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
6565
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
6666

67+
## Test the lookup functionality for merged functions:
68+
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
69+
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
70+
71+
# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
72+
# CHECK-MERGED-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
73+
# CHECK-MERGED-LOOKUP: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
74+
# CHECK-MERGED-LOOKUP: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5
75+
76+
# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
6777

6878

6979
--- !mach-o

llvm/tools/llvm-gsymutil/Opts.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ defm convert :
1717
Eq<"convert",
1818
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
1919
def merged_functions :
20-
FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
20+
FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n"
21+
"Without this option one function per unique address range will be emitted.\n"
22+
"When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n"
23+
"Without this option only one function will be displayed.">;
2124
def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
2225
defm callsites_yaml_file :
2326
Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;

llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static uint64_t SegmentSize;
9898
static bool Quiet;
9999
static std::vector<uint64_t> LookupAddresses;
100100
static bool LookupAddressesFromStdin;
101-
static bool StoreMergedFunctionInfo = false;
101+
static bool UseMergedFunctions = false;
102102
static bool LoadDwarfCallSites = false;
103103
static std::string CallSiteYamlPath;
104104

@@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) {
181181
}
182182

183183
LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
184-
StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
184+
UseMergedFunctions = Args.hasArg(OPT_merged_functions);
185185

186186
if (Args.hasArg(OPT_callsites_yaml_file_EQ)) {
187187
CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ);
@@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
380380
// functions in the first FunctionInfo with that address range. Do this right
381381
// after loading the DWARF data so we don't have to deal with functions from
382382
// the symbol table.
383-
if (StoreMergedFunctionInfo)
383+
if (UseMergedFunctions)
384384
Gsym.prepareMergedFunctions(Out);
385385

386386
// Get the UUID and convert symbol table to GSYM.
@@ -507,9 +507,45 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
507507
return Error::success();
508508
}
509509

510+
static void doLookupMergedFunctions(GsymReader &Gsym, uint64_t Addr,
511+
raw_ostream &OS) {
512+
if (auto Results = Gsym.lookupAll(Addr)) {
513+
OS << "Found " << Results->size() << " functions at address " << HEX64(Addr)
514+
<< ":\n";
515+
for (size_t i = 0; i < Results->size(); ++i) {
516+
if (Verbose) {
517+
if (auto FI = FunctionInfo::decode(*Results->at(i).FunctionInfoData,
518+
Results->at(i).FuncRange.start())) {
519+
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
520+
Gsym.dump(OS, *FI);
521+
OS << "\nLookupResults for " << HEX64(Addr) << ":\n";
522+
}
523+
}
524+
525+
// Print the primary function lookup result
526+
OS << " " << Results->at(i);
527+
528+
if (i != Results->size() - 1)
529+
OS << "\n";
530+
}
531+
} else {
532+
if (Verbose)
533+
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
534+
OS << HEX64(Addr) << ": ";
535+
logAllUnhandledErrors(Results.takeError(), OS, "error: ");
536+
}
537+
if (Verbose)
538+
OS << "\n";
539+
}
540+
510541
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
542+
if (UseMergedFunctions) {
543+
doLookupMergedFunctions(Gsym, Addr, OS);
544+
return;
545+
}
546+
511547
if (auto Result = Gsym.lookup(Addr)) {
512-
// If verbose is enabled dump the full function info for the address.
548+
// If verbose is enabled, dump the full function info for the address.
513549
if (Verbose) {
514550
if (auto FI = Gsym.getFunctionInfo(Addr)) {
515551
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";

0 commit comments

Comments
 (0)