Skip to content

[RemoveDIs][DebugInfo][IR] Add parsing for non-intrinsic debug values #79818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 7, 2024
Merged
4 changes: 4 additions & 0 deletions llvm/include/llvm/AsmParser/LLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ namespace llvm {
/// UpgradeDebuginfo so it can generate broken bitcode.
bool UpgradeDebugInfo;

bool SeenNewDbgInfoFormat = false;
bool SeenOldDbgInfoFormat = false;

std::string SourceFileName;

public:
Expand Down Expand Up @@ -573,6 +576,7 @@ namespace llvm {
bool parseMDNodeTail(MDNode *&N);
bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts);
bool parseMetadataAttachment(unsigned &Kind, MDNode *&MD);
bool parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS);
bool parseInstructionMetadata(Instruction &Inst);
bool parseGlobalObjectMetadataAttachment(GlobalObject &GO);
bool parseOptionalFunctionMetadata(Function &F);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/AsmParser/LLToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum Kind {
exclaim, // !
bar, // |
colon, // :
hash, // #

kw_vscale,
kw_x,
Expand Down Expand Up @@ -479,6 +480,7 @@ enum Kind {
DISPFlag, // DISPFlagFoo
DwarfMacinfo, // DW_MACINFO_foo
ChecksumKind, // CSK_foo
DbgRecordType, // dbg_foo

// Type valued tokens (TyVal).
Type,
Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm/IR/DebugProgramInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,19 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DbgRecord &R) {
class DPLabel : public DbgRecord {
DbgRecordParamRef<DILabel> Label;

/// This constructor intentionally left private, so that it is only called via
/// "createUnresolvedDPLabel", which clearly expresses that it is for parsing
/// only.
DPLabel(MDNode *Label, MDNode *DL);

public:
DPLabel(DILabel *Label, DebugLoc DL);

/// For use during parsing; creates a DPLabel from as-of-yet unresolved
/// MDNodes. Trying to access the resulting DPLabel's fields before they are
/// resolved, or if they resolve to the wrong type, will result in a crash.
static DPLabel *createUnresolvedDPLabel(MDNode *Label, MDNode *DL);

DPLabel *clone() const;
void print(raw_ostream &O, bool IsForDebug = false) const;
void print(raw_ostream &ROS, ModuleSlotTracker &MST, bool IsForDebug) const;
Expand Down Expand Up @@ -286,6 +296,29 @@ class DPValue : public DbgRecord, protected DebugValueUser {
DIAssignID *AssignID, Metadata *Address,
DIExpression *AddressExpression, const DILocation *DI);

private:
/// Private constructor for creating new instances during parsing only. Only
/// called through `createUnresolvedDPValue` below, which makes clear that
/// this is used for parsing only, and will later return a subclass depending
/// on which Type is passed.
DPValue(LocationType Type, Metadata *Val, MDNode *Variable,
MDNode *Expression, MDNode *AssignID, Metadata *Address,
MDNode *AddressExpression, MDNode *DI);

public:
/// Used to create DPValues during parsing, where some metadata references may
/// still be unresolved. Although for some fields a generic `Metadata*`
/// argument is accepted for forward type-references, the verifier and
/// accessors will reject incorrect types later on. The function is used for
/// all types of DPValues for simplicity while parsing, but asserts if any
/// necessary fields are empty or unused fields are not empty, i.e. if the
/// #dbg_assign fields are used for a non-dbg-assign type.
static DPValue *createUnresolvedDPValue(LocationType Type, Metadata *Val,
MDNode *Variable, MDNode *Expression,
MDNode *AssignID, Metadata *Address,
MDNode *AddressExpression,
MDNode *DI);

static DPValue *createDPVAssign(Value *Val, DILocalVariable *Variable,
DIExpression *Expression,
DIAssignID *AssignID, Value *Address,
Expand Down
20 changes: 19 additions & 1 deletion llvm/lib/AsmParser/LLLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,12 @@ lltok::Kind LLLexer::LexCaret() {

/// Lex all tokens that start with a # character.
/// AttrGrpID ::= #[0-9]+
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be updating these BNF descriptions too? (probably just a case of adding '#'?

/// Hash ::= #
lltok::Kind LLLexer::LexHash() {
// Handle AttrGrpID: #[0-9]+
return LexUIntID(lltok::AttrGrpID);
if (isdigit(static_cast<unsigned char>(CurPtr[0])))
return LexUIntID(lltok::AttrGrpID);
return lltok::hash;
}

/// Lex a label, integer type, keyword, or hexadecimal integer constant.
Expand Down Expand Up @@ -923,6 +926,21 @@ lltok::Kind LLLexer::LexIdentifier() {

#undef DWKEYWORD

// Keywords for debug record types.
#define DBGRECORDTYPEKEYWORD(STR) \
do { \
if (Keyword == "dbg_" #STR) { \
StrVal = #STR; \
return lltok::DbgRecordType; \
} \
} while (false)

DBGRECORDTYPEKEYWORD(value);
DBGRECORDTYPEKEYWORD(declare);
DBGRECORDTYPEKEYWORD(assign);
DBGRECORDTYPEKEYWORD(label);
#undef DBGRECORDTYPEKEYWORD

if (Keyword.starts_with("DIFlag")) {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::DIFlag;
Expand Down
168 changes: 166 additions & 2 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,24 @@ static cl::opt<bool> AllowIncompleteIR(
"Allow incomplete IR on a best effort basis (references to unknown "
"metadata will be dropped)"));

extern llvm::cl::opt<bool> UseNewDbgInfoFormat;

static std::string getTypeString(Type *T) {
std::string Result;
raw_string_ostream Tmp(Result);
Tmp << *T;
return Tmp.str();
}

// Currently, we should always process modules in the old debug info format by
// default regardless of the module's format in IR; convert it to the old format
// here.
bool finalizeDebugInfoFormat(Module *M) {
if (M)
M->setIsNewDbgInfoFormat(false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't it use UseNewDbgInfoFormat value (and why isn't it used, if it's declared)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just due to an awkward combination of factors at the moment: not all LLVM features internally support the new debug info format yet, so we use UseNewDbgInfoFormat immediately before running optimization passes (the only part that fully supports the new format right now) and convert back afterwards. Right now UseNewDbgInfoFormat defaults to true, so if we use it here to conditionally use the new format then we'll break the tools that use the parser but don't support the new format. We might be able to change that so that UseNewDbgInfoFormat is only true when we use a tool that fully supports it, but that probably involves a separate patch.

In short, the declaration is the mistake here - I'll remove it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok!

return false;
}

/// Run: module ::= toplevelentity*
bool LLParser::Run(bool UpgradeDebugInfo,
DataLayoutCallbackTy DataLayoutCallback) {
Expand All @@ -86,7 +97,7 @@ bool LLParser::Run(bool UpgradeDebugInfo,
}

return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) ||
validateEndOfIndex();
validateEndOfIndex() || finalizeDebugInfoFormat(M);
}

bool LLParser::parseStandaloneConstantValue(Constant *&C,
Expand Down Expand Up @@ -6041,6 +6052,17 @@ bool LLParser::parseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
return false;
}

bool isOldDbgFormatIntrinsic(StringRef Name) {
// Exit early for the common (non-debug-intrinsic) case.
// We can make this the only check when we begin supporting all "llvm.dbg"
// intrinsics in the new debug info format.
if (!Name.starts_with("llvm.dbg."))
return false;
Intrinsic::ID FnID = Function::lookupIntrinsicID(Name);
return FnID == Intrinsic::dbg_declare || FnID == Intrinsic::dbg_value ||
FnID == Intrinsic::dbg_assign;
}

/// FunctionHeader
/// ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
/// OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
Expand Down Expand Up @@ -6390,9 +6412,31 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {

std::string NameStr;

// parse the instructions in this block until we get a terminator.
// Parse the instructions and debug values in this block until we get a
// terminator.
Instruction *Inst;
auto DeleteDbgRecord = [](DbgRecord *DR) { DR->deleteRecord(); };
using DbgRecordPtr = std::unique_ptr<DbgRecord, decltype(DeleteDbgRecord)>;
SmallVector<DbgRecordPtr> TrailingDbgRecord;
do {
// Handle debug records first - there should always be an instruction
// following the debug records, i.e. they cannot appear after the block
// terminator.
while (Lex.getKind() == lltok::hash) {
if (SeenOldDbgInfoFormat)
return error(Lex.getLoc(), "debug record should not appear in a module "
"containing debug info intrinsics");
SeenNewDbgInfoFormat = true;
Lex.Lex();
if (!M->IsNewDbgInfoFormat)
M->convertToNewDbgValues();

DbgRecord *DR;
if (parseDebugRecord(DR, PFS))
return true;
TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord);
}

// This instruction may have three possibilities for a name: a) none
// specified, b) name specified "%foo =", c) number specified: "%4 =".
LocTy NameLoc = Lex.getLoc();
Expand Down Expand Up @@ -6437,11 +6481,121 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
// Set the name on the instruction.
if (PFS.setInstName(NameID, NameStr, NameLoc, Inst))
return true;

// Attach any preceding debug values to this instruction.
for (DbgRecordPtr &DR : TrailingDbgRecord)
BB->insertDPValueBefore(DR.release(), Inst->getIterator());
TrailingDbgRecord.clear();
} while (!Inst->isTerminator());

assert(TrailingDbgRecord.empty() &&
"All debug values should have been attached to an instruction.");

return false;
}

/// parseDebugRecord
/// ::= #dbg_label '(' MDNode ')'
/// ::= #dbg_type '(' Metadata ',' MDNode ',' Metadata ','
/// (MDNode ',' Metadata ',' Metadata ',')? MDNode ')'
bool LLParser::parseDebugRecord(DbgRecord *&DR, PerFunctionState &PFS) {
using RecordKind = DbgRecord::Kind;
using LocType = DPValue::LocationType;
LocTy DPVLoc = Lex.getLoc();
if (Lex.getKind() != lltok::DbgRecordType)
return error(DPVLoc, "expected debug record type here");
RecordKind RecordType = StringSwitch<RecordKind>(Lex.getStrVal())
.Case("declare", RecordKind::ValueKind)
.Case("value", RecordKind::ValueKind)
.Case("assign", RecordKind::ValueKind)
.Case("label", RecordKind::LabelKind);

// Parsing labels is trivial; parse here and early exit, otherwise go into the
// full DPValue processing stage.
if (RecordType == RecordKind::LabelKind) {
Lex.Lex();
if (parseToken(lltok::lparen, "Expected '(' here"))
return true;
MDNode *Label;
if (parseMDNode(Label))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;
MDNode *DbgLoc;
if (parseMDNode(DbgLoc))
return true;
if (parseToken(lltok::rparen, "Expected ')' here"))
return true;
DR = DPLabel::createUnresolvedDPLabel(Label, DbgLoc);
return false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: braces


LocType ValueType = StringSwitch<LocType>(Lex.getStrVal())
.Case("declare", LocType::Declare)
.Case("value", LocType::Value)
.Case("assign", LocType::Assign);

Lex.Lex();
if (parseToken(lltok::lparen, "Expected '(' here"))
return true;

// Parse Value field.
Metadata *ValLocMD;
if (parseMetadata(ValLocMD, &PFS))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;

// Parse Variable field.
MDNode *Variable;
if (parseMDNode(Variable))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;

// Parse Expression field.
MDNode *Expression;
if (parseMDNode(Expression))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;

// Parse additional fields for #dbg_assign.
MDNode *AssignID = nullptr;
Metadata *AddressLocation = nullptr;
MDNode *AddressExpression = nullptr;
if (ValueType == LocType::Assign) {
// Parse DIAssignID.
if (parseMDNode(AssignID))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;

// Parse address ValueAsMetadata.
if (parseMetadata(AddressLocation, &PFS))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;

// Parse address DIExpression.
if (parseMDNode(AddressExpression))
return true;
if (parseToken(lltok::comma, "Expected ',' here"))
return true;
}

/// Parse DILocation.
MDNode *DebugLoc;
if (parseMDNode(DebugLoc))
return true;

if (parseToken(lltok::rparen, "Expected ')' here"))
return true;
DR = DPValue::createUnresolvedDPValue(ValueType, ValLocMD, Variable,
Expression, AssignID, AddressLocation,
AddressExpression, DebugLoc);
return false;
}
//===----------------------------------------------------------------------===//
// Instruction Parsing.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -7669,6 +7823,16 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
}
CI->setFastMathFlags(FMF);
}

if (CalleeID.Kind == ValID::t_GlobalName &&
isOldDbgFormatIntrinsic(CalleeID.StrVal)) {
if (SeenNewDbgInfoFormat) {
CI->deleteValue();
return error(CallLoc, "llvm.dbg intrinsic should not appear in a module "
"using non-intrinsic debug info");
}
SeenOldDbgInfoFormat = true;
}
CI->setAttributes(PAL);
ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
Inst = CI;
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/IR/DebugProgramInstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,38 @@ DbgRecord::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const {
llvm_unreachable("unsupported DbgRecord kind");
}

DPLabel::DPLabel(MDNode *Label, MDNode *DL)
: DbgRecord(LabelKind, DebugLoc(DL)), Label(Label) {
assert(Label && "Unexpected nullptr");
assert((isa<DILabel>(Label) || Label->isTemporary()) &&
"Label type must be or resolve to a DILabel");
}
DPLabel::DPLabel(DILabel *Label, DebugLoc DL)
: DbgRecord(LabelKind, DL), Label(Label) {
assert(Label && "Unexpected nullptr");
}

DPLabel *DPLabel::createUnresolvedDPLabel(MDNode *Label, MDNode *DL) {
return new DPLabel(Label, DL);
}

DPValue::DPValue(DPValue::LocationType Type, Metadata *Val, MDNode *Variable,
MDNode *Expression, MDNode *AssignID, Metadata *Address,
MDNode *AddressExpression, MDNode *DI)
: DbgRecord(ValueKind, DebugLoc(DI)),
DebugValueUser({Val, Address, AssignID}), Type(Type), Variable(Variable),
Expression(Expression), AddressExpression(AddressExpression) {}

DPValue *DPValue::createUnresolvedDPValue(DPValue::LocationType Type,
Metadata *Val, MDNode *Variable,
MDNode *Expression, MDNode *AssignID,
Metadata *Address,
MDNode *AddressExpression,
MDNode *DI) {
return new DPValue(Type, Val, Variable, Expression, AssignID, Address,
AddressExpression, DI);
}

DPValue *DPValue::createDPValue(Value *Location, DILocalVariable *DV,
DIExpression *Expr, const DILocation *DI) {
return new DPValue(ValueAsMetadata::get(Location), DV, Expr, DI,
Expand Down
Loading