Skip to content

LLVM symbolizer gsym support #134847

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/DebugInfo/DIContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ struct DIDumpOptions {

class DIContext {
public:
enum DIContextKind { CK_DWARF, CK_PDB, CK_BTF };
enum DIContextKind { CK_DWARF, CK_PDB, CK_BTF, CK_GSYM };

DIContext(DIContextKind K) : Kind(K) {}
virtual ~DIContext() = default;
Expand Down
66 changes: 66 additions & 0 deletions llvm/include/llvm/DebugInfo/GSYM/GsymDIContext.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//===-- GsymDIContext.h --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/

#ifndef LLVM_DEBUGINFO_GSYM_GSYMDICONTEXT_H
#define LLVM_DEBUGINFO_GSYM_GSYMDICONTEXT_H

#include "llvm/DebugInfo/DIContext.h"
#include <cstdint>
#include <memory>
#include <string>

namespace llvm {

namespace gsym {

class GsymReader;

/// GSYM DI Context
/// This data structure is the top level entity that deals with GSYM
/// symbolication.
/// This data structure exists only when there is a need for a transparent
/// interface to different symbolication formats (e.g. GSYM, PDB and DWARF).
/// More control and power over the debug information access can be had by using
/// the GSYM interfaces directly.
class GsymDIContext : public DIContext {
public:
GsymDIContext(std::unique_ptr<GsymReader> Reader);

GsymDIContext(GsymDIContext &) = delete;
GsymDIContext &operator=(GsymDIContext &) = delete;

static bool classof(const DIContext *DICtx) {
return DICtx->getKind() == CK_GSYM;
}

void dump(raw_ostream &OS, DIDumpOptions DIDumpOpts) override;

std::optional<DILineInfo> getLineInfoForAddress(
object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
std::optional<DILineInfo>
getLineInfoForDataAddress(object::SectionedAddress Address) override;
DILineInfoTable getLineInfoForAddressRange(
object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
DIInliningInfo getInliningInfoForAddress(
object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;

std::vector<DILocal>
getLocalsForAddress(object::SectionedAddress Address) override;

private:
const std::unique_ptr<GsymReader> Reader;
};

} // end namespace gsym

} // end namespace llvm

#endif // LLVM_DEBUGINFO_PDB_PDBCONTEXT_H
3 changes: 3 additions & 0 deletions llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ class LLVMSymbolizer {
bool RelativeAddresses = false;
bool UntagAddresses = false;
bool UseDIA = false;
bool DisableGsym = false;
std::string DefaultArch;
std::vector<std::string> DsymHints;
std::string FallbackDebugPath;
std::string DWPName;
std::vector<std::string> DebugFileDirectory;
std::vector<std::string> GsymFileDirectory;
size_t MaxCacheSize =
sizeof(size_t) == 4
? 512 * 1024 * 1024 /* 512 MiB */
Expand Down Expand Up @@ -177,6 +179,7 @@ class LLVMSymbolizer {
ObjectFile *lookUpBuildIDObject(const std::string &Path,
const ELFObjectFileBase *Obj,
const std::string &ArchName);
std::string lookUpGsymFile(const std::string &Path);

bool findDebugBinary(const std::string &OrigPath,
const std::string &DebuglinkName, uint32_t CRCHash,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/DebugInfo/GSYM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
FileWriter.cpp
FunctionInfo.cpp
GsymCreator.cpp
GsymDIContext.cpp
GsymReader.cpp
InlineInfo.cpp
LineTable.cpp
Expand Down
166 changes: 166 additions & 0 deletions llvm/lib/DebugInfo/GSYM/GsymDIContext.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
//===-- GsymDIContext.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/

#include "llvm/DebugInfo/GSYM/GsymDIContext.h"

#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/Support/Path.h"

using namespace llvm;
using namespace llvm::gsym;

GsymDIContext::GsymDIContext(std::unique_ptr<GsymReader> Reader)
: DIContext(CK_GSYM), Reader(std::move(Reader)) {}

void GsymDIContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {}

static bool fillLineInfoFromLocation(const SourceLocation &Location,
DILineInfoSpecifier Specifier,
DILineInfo &LineInfo) {
// FIXME Demangle in case of DINameKind::ShortName
if (Specifier.FNKind != DINameKind::None) {
LineInfo.FunctionName = Location.Name.str();
}

switch (Specifier.FLIKind) {
case DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath:
// We have no information to determine the relative path, so we fall back to
// returning the absolute path.
case DILineInfoSpecifier::FileLineInfoKind::RawValue:
case DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath:
if (Location.Dir.empty()) {
if (Location.Base.empty())
LineInfo.FileName = DILineInfo::BadString;
else
LineInfo.FileName = Location.Base.str();
} else {
SmallString<128> Path(Location.Dir);
sys::path::append(Path, Location.Base);
LineInfo.FileName = static_cast<std::string>(Path);
}
break;

case DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly:
LineInfo.FileName = Location.Base.str();
break;

default:
return false;
}
LineInfo.Line = Location.Line;

// We don't have information in GSYM to fill any of the Source, Column,
// StartFileName or StartLine attributes.

return true;
}

std::optional<DILineInfo>
GsymDIContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Specifier) {
if (Address.SectionIndex != object::SectionedAddress::UndefSection)
return {};

auto ResultOrErr = Reader->lookup(Address.Address);

if (!ResultOrErr) {
consumeError(ResultOrErr.takeError());
return {};
}

const auto &Result = *ResultOrErr;

DILineInfo LineInfo;

if (Result.Locations.empty()) {
// No debug info for this, we just had a symbol from the symbol table.

// FIXME Demangle in case of DINameKind::ShortName
if (Specifier.FNKind != DINameKind::None)
LineInfo.FunctionName = Result.FuncName.str();
} else if (!fillLineInfoFromLocation(Result.Locations.front(), Specifier,
LineInfo))
return {};

LineInfo.StartAddress = Result.FuncRange.start();

return LineInfo;
}

std::optional<DILineInfo>
GsymDIContext::getLineInfoForDataAddress(object::SectionedAddress Address) {
// We can't implement this, there's no such information in the GSYM file.

return {};
}

DILineInfoTable
GsymDIContext::getLineInfoForAddressRange(object::SectionedAddress Address,
uint64_t Size,
DILineInfoSpecifier Specifier) {
if (Size == 0)
return DILineInfoTable();

if (Address.SectionIndex != llvm::object::SectionedAddress::UndefSection)
return DILineInfoTable();

if (auto FuncInfoOrErr = Reader->getFunctionInfo(Address.Address)) {
DILineInfoTable Table;
if (FuncInfoOrErr->OptLineTable) {
const gsym::LineTable &LT = *FuncInfoOrErr->OptLineTable;
const uint64_t StartAddr = Address.Address;
const uint64_t EndAddr = Address.Address + Size;
for (const auto &LineEntry : LT) {
if (StartAddr <= LineEntry.Addr && LineEntry.Addr < EndAddr) {
// Use LineEntry.Addr, LineEntry.File (which is a file index into the
// files tables from the GsymReader), and LineEntry.Line (source line
// number) to add stuff to the DILineInfoTable
}
}
}
return Table;
} else {
consumeError(FuncInfoOrErr.takeError());
return DILineInfoTable();
}
}

DIInliningInfo
GsymDIContext::getInliningInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Specifier) {
auto ResultOrErr = Reader->lookup(Address.Address);

if (!ResultOrErr)
return {};

const auto &Result = *ResultOrErr;

DIInliningInfo InlineInfo;

for (const auto &Location : Result.Locations) {
DILineInfo LineInfo;

if (!fillLineInfoFromLocation(Location, Specifier, LineInfo))
return {};

// Hm, that's probably something that should only be filled in the first or
// last frame?
LineInfo.StartAddress = Result.FuncRange.start();

InlineInfo.addFrame(LineInfo);
}

return InlineInfo;
}

std::vector<DILocal>
GsymDIContext::getLocalsForAddress(object::SectionedAddress Address) {
// We can't implement this, there's no such information in the GSYM file.

return {};
}
1 change: 1 addition & 0 deletions llvm/lib/DebugInfo/Symbolize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_llvm_component_library(LLVMSymbolize

LINK_COMPONENTS
DebugInfoDWARF
DebugInfoGSYM
DebugInfoPDB
DebugInfoBTF
Object
Expand Down
94 changes: 71 additions & 23 deletions llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/BTF/BTFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/GSYM/GsymDIContext.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
Expand Down Expand Up @@ -498,6 +500,34 @@ bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
return false;
}

std::string LLVMSymbolizer::lookUpGsymFile(const std::string &Path) {
if (Opts.DisableGsym)
return {};

auto CheckGsymFile = [](const llvm::StringRef &GsymPath) {
sys::fs::file_status Status;
std::error_code EC = llvm::sys::fs::status(GsymPath, Status);
return !EC && !llvm::sys::fs::is_directory(Status);
};

// First, look beside the binary file
if (const auto GsymPath = Path + ".gsym"; CheckGsymFile(GsymPath))
return GsymPath;

// Then, look in the directories specified by GsymFileDirectory

for (const auto &Directory : Opts.GsymFileDirectory) {
SmallString<16> GsymPath = llvm::StringRef{Directory};
llvm::sys::path::append(GsymPath,
llvm::sys::path::filename(Path) + ".gsym");

if (CheckGsymFile(GsymPath))
return static_cast<std::string>(GsymPath);
}

return {};
}

Expected<LLVMSymbolizer::ObjectPair>
LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
const std::string &ArchName) {
Expand Down Expand Up @@ -634,30 +664,48 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
std::unique_ptr<DIContext> Context;
// If this is a COFF object containing PDB info and not containing DWARF
// section, use a PDBContext to symbolize. Otherwise, use DWARF.
if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
const codeview::DebugInfo *DebugInfo;
StringRef PDBFileName;
auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
// Use DWARF if there're DWARF sections.
bool HasDwarf =
llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool {
if (Expected<StringRef> SectionName = Section.getName())
return SectionName.get() == ".debug_info";
return false;
});
if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
using namespace pdb;
std::unique_ptr<IPDBSession> Session;

PDB_ReaderType ReaderType =
Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
Session)) {
Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
// Return along the PDB filename to provide more context
return createFileError(PDBFileName, std::move(Err));
// Create a DIContext to symbolize as follows:
// - If there is a GSYM file, create a GsymDIContext.
// - Otherwise, if this is a COFF object containing PDB info, create a
// PDBContext.
// - Otherwise, create a DWARFContext.
const auto GsymFile = lookUpGsymFile(BinaryName.str());
if (!GsymFile.empty()) {
auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);

if (ReaderOrErr) {
std::unique_ptr<gsym::GsymReader> Reader =
std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr));

Context = std::make_unique<gsym::GsymDIContext>(std::move(Reader));
}
}
if (!Context) {
if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
const codeview::DebugInfo *DebugInfo;
StringRef PDBFileName;
auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
// Use DWARF if there're DWARF sections.
bool HasDwarf = llvm::any_of(
Objects.first->sections(), [](SectionRef Section) -> bool {
if (Expected<StringRef> SectionName = Section.getName())
return SectionName.get() == ".debug_info";
return false;
});
if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
using namespace pdb;
std::unique_ptr<IPDBSession> Session;

PDB_ReaderType ReaderType =
Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
Session)) {
Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
// Return along the PDB filename to provide more context
return createFileError(PDBFileName, std::move(Err));
}
Context.reset(new PDBContext(*CoffObject, std::move(Session)));
}
Context.reset(new PDBContext(*CoffObject, std::move(Session)));
}
}
if (!Context)
Expand Down
Binary file not shown.
Binary file not shown.
Loading