Skip to content

[DebugInfo][RemoveDIs] Add a DPValue implementation for instcombine sinking #77930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/DebugProgramInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class DPValue : public ilist_node<DPValue>, private DebugValueUser {
public:
void deleteInstr();

const Instruction *getInstruction() const;
const BasicBlock *getParent() const;
BasicBlock *getParent();
void dump() const;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/IR/DebugProgramInstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ bool DPValue::isKillAddress() const {
return !Addr || isa<UndefValue>(Addr);
}

const Instruction *DPValue::getInstruction() const {
return Marker->MarkedInstr;
}

const BasicBlock *DPValue::getParent() const {
return Marker->MarkedInstr->getParent();
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);

bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);
void tryToSinkInstructionDbgValues(
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers);
void tryToSinkInstructionDPValues(Instruction *I,
BasicBlock::iterator InsertPos,
BasicBlock *SrcBlock, BasicBlock *DestBlock,
SmallVectorImpl<DPValue *> &DPUsers);

bool removeInstructionsBeforeUnreachable(Instruction &I);
void addDeadEdge(BasicBlock *From, BasicBlock *To,
Expand Down
154 changes: 147 additions & 7 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4313,12 +4313,31 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
// mark the location undef: we know it was supposed to receive a new location
// here, but that computation has been sunk.
SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
findDbgUsers(DbgUsers, I);
SmallVector<DPValue *, 2> DPValues;
findDbgUsers(DbgUsers, I, &DPValues);
if (!DbgUsers.empty())
tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
if (!DPValues.empty())
tryToSinkInstructionDPValues(I, InsertPos, SrcBlock, DestBlock, DPValues);

// PS: there are numerous flaws with this behaviour, not least that right now
// assignments can be re-ordered past other assignments to the same variable
// if they use different Values. Creating more undef assignements can never be
// undone. And salvaging all users outside of this block can un-necessarily
// alter the lifetime of the live-value that the variable refers to.
// Some of these things can be resolved by tolerating debug use-before-defs in
// LLVM-IR, however it depends on the instruction-referencing CodeGen backend
// being used for more architectures.

return true;
}

void InstCombinerImpl::tryToSinkInstructionDbgValues(
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) {
// For all debug values in the destination block, the sunk instruction
// will still be available, so they do not need to be dropped.
SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;
SmallVector<DPValue *, 2> DPValuesToSalvage;
for (auto &DbgUser : DbgUsers)
if (DbgUser->getParent() != DestBlock)
DbgUsersToSalvage.push_back(DbgUser);
Expand Down Expand Up @@ -4362,19 +4381,140 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,

// Perform salvaging without the clones, then sink the clones.
if (!DIIClones.empty()) {
// RemoveDIs: pass in empty vector of DPValues until we get to instrumenting
// this pass.
SmallVector<DPValue *, 1> DummyDPValues;
salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, DummyDPValues);
salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
// The clones are in reverse order of original appearance, reverse again to
// maintain the original order.
for (auto &DIIClone : llvm::reverse(DIIClones)) {
DIIClone->insertBefore(&*InsertPos);
LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
}
}
}

return true;
void InstCombinerImpl::tryToSinkInstructionDPValues(
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
BasicBlock *DestBlock, SmallVectorImpl<DPValue *> &DPValues) {
// Implementation of tryToSinkInstructionDbgValues, but for the DPValue of
// variable assignments rather than dbg.values.

// Fetch all DPValues not already in the destination.
SmallVector<DPValue *, 2> DPValuesToSalvage;
for (auto &DPV : DPValues)
if (DPV->getParent() != DestBlock)
DPValuesToSalvage.push_back(DPV);

// Fetch a second collection, of DPValues in the source block that we're going
// to sink.
SmallVector<DPValue *> DPValuesToSink;
for (DPValue *DPV : DPValuesToSalvage)
if (DPV->getParent() == SrcBlock)
DPValuesToSink.push_back(DPV);

// Sort DPValues according to their position in the block. This is a partial
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tested the performance of biting the O(n) bullet and inspecting the ordering of DPValues attached to a single instruction directly as part of the Order lambda? Worst case it'll be worse than what's here, but I'm curious how the overhead of using several map/set structures (even Small ones) compare in the typical case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numbers: http://llvm-compile-time-tracker.com/compare.php?from=90525125421300d9d1b6bf55288bd1871855d35d&to=5c1b0a6c7d28637f3e0ac059e5af561a96396365&stat=instructions%3Au

No regression in stage1-ReleaseLTO-g, there are some differences in the non-debuginfo modes, I'm not sure why that is really. Something else to keep our eyes on I guess.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually thinking about the performance of removing all the filtermap/duplicate detection logic and just iterating over getDbgValueRange() to determine whether A or B comes first when they're attached to the same instruction - I submitted a quick test to the compile time tracker[0] though and it looks like it's neutral for most cases and a slowdown in the bad case (testing specifically with RemoveDIs enabled), so I'm happy with this approach.
[0] http://llvm-compile-time-tracker.com/compare.php?from=319280746b199b35c49798383b6dd4c01286c5ff&to=c29a7ff328a4148aabd7e147f2bfc04e9801fcb6&stat=instructions:u

// order: DPValues attached to different instructions will be ordered by the
// instruction order, but DPValues attached to the same instruction won't
// have an order.
auto Order = [](DPValue *A, DPValue *B) -> bool {
return B->getInstruction()->comesBefore(A->getInstruction());
};
llvm::stable_sort(DPValuesToSink, Order);

// If there are two assignments to the same variable attached to the same
// instruction, the ordering between the two assignments is important. Scan
// for this (rare) case and establish which is the last assignment.
using InstVarPair = std::pair<const Instruction *, DebugVariable>;
SmallDenseMap<InstVarPair, DPValue *> FilterOutMap;
if (DPValuesToSink.size() > 1) {
SmallDenseMap<InstVarPair, unsigned> CountMap;
// Count how many assignments to each variable there is per instruction.
for (DPValue *DPV : DPValuesToSink) {
DebugVariable DbgUserVariable =
DebugVariable(DPV->getVariable(), DPV->getExpression(),
DPV->getDebugLoc()->getInlinedAt());
CountMap[std::make_pair(DPV->getInstruction(), DbgUserVariable)] += 1;
}

// If there are any instructions with two assignments, add them to the
// FilterOutMap to record that they need extra filtering.
SmallPtrSet<const Instruction *, 4> DupSet;
for (auto It : CountMap) {
if (It.second > 1) {
FilterOutMap[It.first] = nullptr;
DupSet.insert(It.first.first);
}
}

// For all instruction/variable pairs needing extra filtering, find the
// latest assignment.
for (const Instruction *Inst : DupSet) {
for (DPValue &DPV : llvm::reverse(Inst->getDbgValueRange())) {
DebugVariable DbgUserVariable =
DebugVariable(DPV.getVariable(), DPV.getExpression(),
DPV.getDebugLoc()->getInlinedAt());
auto FilterIt =
FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
if (FilterIt == FilterOutMap.end())
continue;
if (FilterIt->second != nullptr)
continue;
FilterIt->second = &DPV;
}
}
}

// Perform cloning of the DPValues that we plan on sinking, filter out any
// duplicate assignments identified above.
SmallVector<DPValue *, 2> DPVClones;
SmallSet<DebugVariable, 4> SunkVariables;
for (DPValue *DPV : DPValuesToSink) {
if (DPV->Type == DPValue::LocationType::Declare)
continue;

DebugVariable DbgUserVariable =
DebugVariable(DPV->getVariable(), DPV->getExpression(),
DPV->getDebugLoc()->getInlinedAt());

// For any variable where there were multiple assignments in the same place,
// ignore all but the last assignment.
if (!FilterOutMap.empty()) {
InstVarPair IVP = std::make_pair(DPV->getInstruction(), DbgUserVariable);
auto It = FilterOutMap.find(IVP);

// Filter out.
if (It != FilterOutMap.end() && It->second != DPV)
continue;
}

if (!SunkVariables.insert(DbgUserVariable).second)
continue;

if (DPV->isDbgAssign())
continue;

DPVClones.emplace_back(DPV->clone());
LLVM_DEBUG(dbgs() << "CLONE: " << *DPVClones.back() << '\n');
}

// Perform salvaging without the clones, then sink the clones.
if (DPVClones.empty())
return;

salvageDebugInfoForDbgValues(*I, {}, DPValuesToSalvage);

// The clones are in reverse order of original appearance. Assert that the
// head bit is set on the iterator as we _should_ have received it via
// getFirstInsertionPt. Inserting like this will reverse the clone order as
// we'll repeatedly insert at the head, such as:
// DPV-3 (third insertion goes here)
// DPV-2 (second insertion goes here)
// DPV-1 (first insertion goes here)
// Any-Prior-DPVs
// InsertPtInst
assert(InsertPos.getHeadBit());
for (DPValue *DPVClone : DPVClones) {
InsertPos->getParent()->insertDPValueBefore(DPVClone, InsertPos);
LLVM_DEBUG(dbgs() << "SINK: " << *DPVClone << '\n');
}
}

bool InstCombinerImpl::run() {
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; RUN: opt %s -o - -S --passes=instcombine | FileCheck %s
; RUN: opt %s -o - -S --passes=instcombine --try-experimental-debuginfo-iterators | FileCheck %s
;
; CHECK-LABEL: for.body:
; CHECK-NEXT: %sub.ptr.rhs.cast.i.i = ptrtoint ptr %call2.i.i to i64,
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i64 %sub.ptr.rhs.cast.i.i, metadata !{{[0-9]*}}, metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)
;
;; The code below is representative of a common situation: where we've had a
;; loop be completely optimised out, leaving dbg.values representing the
;; assignments for it, including a rotated assignment to the loop counter.
;; Below, it's the two dbg.values in the entry block, assigning first the
;; value of %conv.i, then the value of %conv.i minus one.
;;
;; When instcombine sinks %conv.i, it's critical that if it sinks a dbg.value
;; with it, it sinks the most recent assignment. Otherwise it will re-order the
;; assignments below, and a fall counter assignment will continue on from the
;; end of the optimised-out loop.
;;
;; The check lines test that when the trunc sinks (along with the ptrtoint),
;; we get the dbg.value with a DW_OP_minus in it.

; ModuleID = 'tmp.ll'
source_filename = "tmp.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @llvm.dbg.declare(metadata, metadata, metadata)

define void @_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE(i1 %tobool.not) local_unnamed_addr {
entry:
%call2.i.i = load volatile ptr, ptr null, align 8, !dbg !4
%sub.ptr.rhs.cast.i.i = ptrtoint ptr %call2.i.i to i64, !dbg !4
%conv.i = trunc i64 %sub.ptr.rhs.cast.i.i to i32, !dbg !4
tail call void @llvm.dbg.value(metadata i32 %conv.i, metadata !16, metadata !DIExpression()), !dbg !18
tail call void @llvm.dbg.value(metadata i32 %conv.i, metadata !16, metadata !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)), !dbg !18
br i1 %tobool.not, label %common.ret, label %for.body

common.ret: ; preds = %for.body, %entry
ret void

for.body: ; preds = %entry
%call2 = call ptr @_ZNK4llvm6MCInst10getOperandEj(i32 %conv.i)
br label %common.ret
}

declare i32 @_ZNK4llvm6MCInst14getNumOperandsEv()

declare ptr @_ZNK4llvm6MCInst10getOperandEj(i32) local_unnamed_addr

declare i64 @_ZNK4llvm25SmallVectorTemplateCommonINS_9MCOperandEvE4sizeEv()

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "foo.cpp", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !DILocation(line: 197, column: 26, scope: !5)
!5 = distinct !DILexicalBlock(scope: !7, file: !6, line: 197, column: 3)
!6 = !DIFile(filename: "foo.cpp", directory: ".")
!7 = distinct !DISubprogram(name: "EmitInstruction", linkageName: "_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE", scope: !8, file: !6, line: 195, type: !13, scopeLine: 195, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, declaration: !15, retainedNodes: !2)
!8 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MCObjectStreamer", scope: !10, file: !9, line: 33, size: 2432, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !2, vtableHolder: !11)
!9 = !DIFile(filename: "bar.h", directory: ".")
!10 = !DINamespace(name: "llvm", scope: null)
!11 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MCStreamer", scope: !10, file: !12, line: 108, size: 2240, flags: DIFlagFwdDecl | DIFlagNonTrivial, identifier: "_ZTSN4llvm10MCStreamerE")
!12 = !DIFile(filename: "baz.h", directory: ".")
!13 = distinct !DISubroutineType(types: !14)
!14 = !{null}
!15 = !DISubprogram(name: "EmitInstruction", linkageName: "_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE", scope: !8, file: !9, line: 88, type: !13, scopeLine: 88, containingType: !8, virtualIndex: 86, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagVirtual | DISPFlagOptimized)
!16 = !DILocalVariable(name: "i", scope: !5, file: !6, line: 197, type: !17)
!17 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!18 = !DILocation(line: 0, scope: !5)