Skip to content

[RemoveDIs] Read/write DbgRecords directly from/to bitcode #83251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,17 @@ enum FunctionCodes {
// operation, align, vol,
// ordering, synchscope]
FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...]

FUNC_CODE_DEBUG_RECORD_VALUE =
61, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata]
FUNC_CODE_DEBUG_RECORD_DECLARE =
62, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata]
FUNC_CODE_DEBUG_RECORD_ASSIGN =
63, // [DILocation, DILocalVariable, DIExpression, ValueAsMetadata,
// DIAssignID, DIExpression (addr), ValueAsMetadata (addr)]
FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE =
64, // [DILocation, DILocalVariable, DIExpression, Value]
FUNC_CODE_DEBUG_RECORD_LABEL = 65, // [DILocation, DILabel]
};

enum UseListCodes {
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS)
STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_DECLARE)
STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE)
STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_ASSIGN)
STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE_SIMPLE)
STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_LABEL)
}
case bitc::VALUE_SYMTAB_BLOCK_ID:
switch (CodeID) {
Expand Down
101 changes: 98 additions & 3 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,6 @@ static cl::opt<bool> ExpandConstantExprs(
cl::desc(
"Expand constant expressions to instructions for testing purposes"));

// Declare external flag for whether we're using the new debug-info format.
extern llvm::cl::opt<bool> UseNewDbgInfoFormat;

namespace {

enum {
Expand Down Expand Up @@ -4279,6 +4276,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
Error BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata,
ParserCallbacks Callbacks) {
// Force the debug-info mode into the old format for now.
// FIXME: Remove this once all tools support RemoveDIs.
TheModule->IsNewDbgInfoFormat = false;

this->ValueTypeCallback = std::move(Callbacks.ValueType);
if (ResumeBit) {
if (Error JumpFailed = Stream.JumpToBit(ResumeBit))
Expand Down Expand Up @@ -6398,6 +6399,89 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
case bitc::FUNC_CODE_DEBUG_RECORD_LABEL: {
// DPLabels are placed after the Instructions that they are attached to.
Instruction *Inst = getLastInstruction();
if (!Inst)
return error("Invalid dbg record: missing instruction");
DILocation *DIL = cast<DILocation>(getFnMetadataByID(Record[0]));
DILabel *Label = cast<DILabel>(getFnMetadataByID(Record[1]));
Inst->getParent()->insertDbgRecordBefore(
new DPLabel(Label, DebugLoc(DIL)), Inst->getIterator());
continue; // This isn't an instruction.
}
case bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE:
case bitc::FUNC_CODE_DEBUG_RECORD_VALUE:
case bitc::FUNC_CODE_DEBUG_RECORD_DECLARE:
case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: {
// DPValues are placed after the Instructions that they are attached to.
Instruction *Inst = getLastInstruction();
if (!Inst)
return error("Invalid dbg record: missing instruction");

// First 3 fields are common to all kinds:
// DILocation, DILocalVariable, DIExpression
// dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE)
// ..., LocationMetadata
// dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE - abbrev'd)
// ..., Value
// dbg_declare (FUNC_CODE_DEBUG_RECORD_DECLARE)
// ..., LocationMetadata
// dbg_assign (FUNC_CODE_DEBUG_RECORD_ASSIGN)
// ..., LocationMetadata, DIAssignID, DIExpression, LocationMetadata
unsigned Slot = 0;
// Common fields (0-2).
DILocation *DIL = cast<DILocation>(getFnMetadataByID(Record[Slot++]));
DILocalVariable *Var =
cast<DILocalVariable>(getFnMetadataByID(Record[Slot++]));
DIExpression *Expr =
cast<DIExpression>(getFnMetadataByID(Record[Slot++]));

// Union field (3: LocationMetadata | Value).
Metadata *RawLocation = nullptr;
if (BitCode == bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE) {
Value *V = nullptr;
unsigned TyID = 0;
// We never expect to see a fwd reference value here because
// use-before-defs are encoded with the standard non-abbrev record
// type (they'd require encoding the type too, and they're rare). As a
// result, getValueTypePair only ever increments Slot by one here (once
// for the value, never twice for value and type).
unsigned SlotBefore = Slot;
if (getValueTypePair(Record, Slot, NextValueNo, V, TyID, CurBB))
return error("Invalid dbg record: invalid value");
(void)SlotBefore;
assert((SlotBefore == Slot - 1) && "unexpected fwd ref");
RawLocation = ValueAsMetadata::get(V);
} else {
RawLocation = getFnMetadataByID(Record[Slot++]);
}

DPValue *DPV = nullptr;
switch (BitCode) {
case bitc::FUNC_CODE_DEBUG_RECORD_VALUE:
case bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE:
DPV = new DPValue(RawLocation, Var, Expr, DIL,
DPValue::LocationType::Value);
break;
case bitc::FUNC_CODE_DEBUG_RECORD_DECLARE:
DPV = new DPValue(RawLocation, Var, Expr, DIL,
DPValue::LocationType::Declare);
break;
case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: {
DIAssignID *ID = cast<DIAssignID>(getFnMetadataByID(Record[Slot++]));
DIExpression *AddrExpr =
cast<DIExpression>(getFnMetadataByID(Record[Slot++]));
Metadata *Addr = getFnMetadataByID(Record[Slot++]);
DPV = new DPValue(RawLocation, Var, Expr, ID, Addr, AddrExpr, DIL);
break;
}
default:
llvm_unreachable("Unknown DPValue bitcode");
}
Inst->getParent()->insertDbgRecordBefore(DPV, Inst->getIterator());
continue; // This isn't an instruction.
}
case bitc::FUNC_CODE_INST_CALL: {
// CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
Expand Down Expand Up @@ -6677,10 +6761,21 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
// Move the bit stream to the saved position of the deferred function body.
if (Error JumpFailed = Stream.JumpToBit(DFII->second))
return JumpFailed;

// Set the debug info mode to "new", forcing a mismatch between
// module and function debug modes. This is okay because we'll convert
// everything back to the old mode after parsing.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it alright to feed intrinsic-mode code into the convert-from-removedis-mode utilities? I can see how it'd work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, convertFromNewDbgValues doesn't do any checking. It just converts DbgRecords to intrinsics. It doesn't do anything / doesn't care if it sees existing intrinsics.

// FIXME: Remove this once all tools support RemoveDIs.
F->IsNewDbgInfoFormat = true;

if (Error Err = parseFunctionBody(F))
return Err;
F->setIsMaterializable(false);

// Convert new debug info records into intrinsics.
// FIXME: Remove this once all tools support RemoveDIs.
F->convertFromNewDbgValues();

if (StripDebugInfo)
stripDebugInfo(*F);

Expand Down
120 changes: 102 additions & 18 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ namespace llvm {
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
}

extern bool WriteNewDbgInfoFormatToBitcode;
extern llvm::cl::opt<bool> UseNewDbgInfoFormat;

namespace {

/// These are manifest constants used by the bitcode writer. They do not need to
Expand Down Expand Up @@ -128,6 +131,7 @@ enum {
FUNCTION_INST_RET_VAL_ABBREV,
FUNCTION_INST_UNREACHABLE_ABBREV,
FUNCTION_INST_GEP_ABBREV,
FUNCTION_DEBUG_RECORD_VALUE_ABBREV,
};

/// Abstract class to manage the bitcode writing, subclassed for each bitcode
Expand Down Expand Up @@ -3512,25 +3516,95 @@ void ModuleBitcodeWriter::writeFunction(
NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc();

// If the instruction has a debug location, emit it.
DILocation *DL = I.getDebugLoc();
if (!DL)
continue;

if (DL == LastDL) {
// Just repeat the same debug loc as last time.
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
continue;
if (DILocation *DL = I.getDebugLoc()) {
if (DL == LastDL) {
// Just repeat the same debug loc as last time.
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
} else {
Vals.push_back(DL->getLine());
Vals.push_back(DL->getColumn());
Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
Vals.push_back(DL->isImplicitCode());
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
Vals.clear();
LastDL = DL;
}
}

Vals.push_back(DL->getLine());
Vals.push_back(DL->getColumn());
Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
Vals.push_back(DL->isImplicitCode());
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
Vals.clear();

LastDL = DL;
// If the instruction has DbgRecords attached to it, emit them. Note that
// they come after the instruction so that it's easy to attach them again
// when reading the bitcode, even though conceptually the debug locations
// start "before" the instruction.
if (I.hasDbgRecords() && WriteNewDbgInfoFormatToBitcode) {
/// Try to push the value only (unwrapped), otherwise push the
/// metadata wrapped value. Returns true if the value was pushed
/// without the ValueAsMetadata wrapper.
auto PushValueOrMetadata = [&Vals, InstID,
this](Metadata *RawLocation) {
assert(RawLocation && "RawLocation unexpectedly null in DPValue");
if (ValueAsMetadata *VAM = dyn_cast<ValueAsMetadata>(RawLocation)) {
SmallVector<unsigned, 2> ValAndType;
// If the value is a fwd-ref the type is also pushed. We don't
// want the type, so fwd-refs are kept wrapped (pushValueAndType
// returns false if the value is pushed without type).
if (!pushValueAndType(VAM->getValue(), InstID, ValAndType)) {
Comment on lines +3548 to +3551
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm reading this right, we'll only encode a Value reference if it wouldn't have the type, and so isn't a fwd-dec? Seems alright to me; IMO we have the ability to choose how these things get encoded.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's right. It saves us needing to encode the fwd-ref type, which would result in an extra field in the abbrev that is nonzero for < 1% of records.

Vals.push_back(ValAndType[0]);
return true;
}
}
// The metadata is a DIArgList, or ValueAsMetadata wrapping a
// fwd-ref. Push the metadata ID.
Vals.push_back(VE.getMetadataID(RawLocation));
return false;
};

// Write out non-instruction debug information attached to this
// instruction. Write it after the instruction so that it's easy to
// re-attach to the instruction reading the records in.
for (DbgRecord &DR : I.DbgMarker->getDbgRecordRange()) {
if (DPLabel *DPL = dyn_cast<DPLabel>(&DR)) {
Vals.push_back(VE.getMetadataID(&*DPL->getDebugLoc()));
Vals.push_back(VE.getMetadataID(DPL->getLabel()));
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_LABEL, Vals);
Vals.clear();
continue;
}

// First 3 fields are common to all kinds:
// DILocation, DILocalVariable, DIExpression
// dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE)
// ..., LocationMetadata
// dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE - abbrev'd)
// ..., Value
// dbg_declare (FUNC_CODE_DEBUG_RECORD_DECLARE)
// ..., LocationMetadata
// dbg_assign (FUNC_CODE_DEBUG_RECORD_ASSIGN)
// ..., LocationMetadata, DIAssignID, DIExpression, LocationMetadata
DPValue &DPV = cast<DPValue>(DR);
Vals.push_back(VE.getMetadataID(&*DPV.getDebugLoc()));
Vals.push_back(VE.getMetadataID(DPV.getVariable()));
Vals.push_back(VE.getMetadataID(DPV.getExpression()));
if (DPV.isDbgValue()) {
if (PushValueOrMetadata(DPV.getRawLocation()))
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE, Vals,
FUNCTION_DEBUG_RECORD_VALUE_ABBREV);
else
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE, Vals);
} else if (DPV.isDbgDeclare()) {
Vals.push_back(VE.getMetadataID(DPV.getRawLocation()));
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_DECLARE, Vals);
} else {
assert(DPV.isDbgAssign() && "Unexpected DbgRecord kind");
Vals.push_back(VE.getMetadataID(DPV.getRawLocation()));
Vals.push_back(VE.getMetadataID(DPV.getAssignID()));
Vals.push_back(VE.getMetadataID(DPV.getAddressExpression()));
Vals.push_back(VE.getMetadataID(DPV.getRawAddress()));
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN, Vals);
}
Vals.clear();
}
}
}

if (BlockAddress *BA = BlockAddress::lookup(&BB)) {
Expand Down Expand Up @@ -3771,7 +3845,17 @@ void ModuleBitcodeWriter::writeBlockInfo() {
FUNCTION_INST_GEP_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}

{
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // dbgloc
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // var
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // expr
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // val
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_DEBUG_RECORD_VALUE_ABBREV)
llvm_unreachable("Unexpected abbrev ordering! 1");
}
Stream.ExitBlock();
}

Expand Down
20 changes: 10 additions & 10 deletions llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@
#include "llvm/Pass.h"
using namespace llvm;

extern bool WriteNewDbgInfoFormatToBitcode;

PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
// RemoveDIs: there's no bitcode representation of the DbgRecord debug-info,
// convert to dbg.values before writing out.
bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
if (IsNewDbgInfoFormat)
bool ConvertToOldDbgFormatForWrite =
M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode;
if (ConvertToOldDbgFormatForWrite)
M.convertFromNewDbgValues();

const ModuleSummaryIndex *Index =
EmitSummaryIndex ? &(AM.getResult<ModuleSummaryIndexAnalysis>(M))
: nullptr;
WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, Index, EmitModuleHash);

if (IsNewDbgInfoFormat)
if (ConvertToOldDbgFormatForWrite)
M.convertToNewDbgValues();

return PreservedAnalyses::all();
Expand All @@ -56,16 +57,15 @@ namespace {
StringRef getPassName() const override { return "Bitcode Writer"; }

bool runOnModule(Module &M) override {
// RemoveDIs: there's no bitcode representation of the DbgRecord
// debug-info, convert to dbg.values before writing out.
bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
if (IsNewDbgInfoFormat)
bool ConvertToOldDbgFormatForWrite =
M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode;
if (ConvertToOldDbgFormatForWrite)
M.convertFromNewDbgValues();

WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, /*Index=*/nullptr,
/*EmitModuleHash=*/false);

if (IsNewDbgInfoFormat)
if (ConvertToOldDbgFormatForWrite)
M.convertToNewDbgValues();
return false;
}
Expand Down
Loading