Skip to content

AMDGPU: Fix trying to query end iterator for DebugLoc #129886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ class SIFixSGPRCopies {
// have any other uses.
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
MachineBasicBlock *BlockToInsertTo,
MachineBasicBlock::iterator PointToInsertTo);
MachineBasicBlock::iterator PointToInsertTo,
const DebugLoc &DL);
};

class SIFixSGPRCopiesLegacy : public MachineFunctionPass {
Expand Down Expand Up @@ -682,11 +683,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
MachineBasicBlock::iterator PointToInsertCopy =
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;

const DebugLoc &DL = MI.getDebugLoc();
if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
PointToInsertCopy)) {
PointToInsertCopy, DL)) {
MachineInstr *NewCopy =
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
PointToInsertCopy->getDebugLoc(),
BuildMI(*BlockToInsertCopy, PointToInsertCopy, DL,
TII->get(AMDGPU::COPY), NewDst)
.addReg(MO.getReg());
MO.setReg(NewDst);
Expand Down Expand Up @@ -855,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MachineOperand &MaybeVGPRConstMO, Register DstReg,
MachineBasicBlock *BlockToInsertTo,
MachineBasicBlock::iterator PointToInsertTo) {
MachineBasicBlock::iterator PointToInsertTo, const DebugLoc &DL) {

MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
if (!DefMI || !DefMI->isMoveImmediate())
Expand All @@ -869,8 +870,7 @@ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MRI->getRegClass(MaybeVGPRConstMO.getReg());
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
TII->get(MoveOp), DstReg)
BuildMI(*BlockToInsertTo, PointToInsertTo, DL, TII->get(MoveOp), DstReg)
.add(*SrcConst);
if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
DefMI->eraseFromParent();
Expand All @@ -896,7 +896,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
.add(MI.getOperand(1));
MI.getOperand(1).setReg(TmpReg);
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
MI)) {
MI, MI.getDebugLoc())) {
I = std::next(I);
MI.eraseFromParent();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s

define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) {
; CHECK-LABEL: rocrand_regression:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: .LBB0_1: ; %do.body
; CHECK-NEXT: ; =>This Loop Header: Depth=1
; CHECK-NEXT: ; Child Loop BB0_2 Depth 2
; CHECK-NEXT: s_mov_b64 s[6:7], 0
; CHECK-NEXT: .LBB0_2: ; %while.cond
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[4:5]
; CHECK-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_cbranch_execnz .LBB0_2
; CHECK-NEXT: ; %bb.3: ; %do.cond
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_or_b32 s8, s8, 1
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
; CHECK-NEXT: ; %bb.4: ; %DummyReturnBlock
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
br label %do.body

do.body: ; preds = %do.cond, %entry
%phi.0 = phi i32 [ %arg0, %do.cond ], [ 0, %entry ]
%phi.1 = phi i32 [ %add6, %do.cond ], [ 0, %entry ]
%add6 = or i32 %phi.1, 1
store i32 %phi.1, ptr addrspace(1) %arg, align 4
br label %while.cond

while.cond: ; preds = %while.cond, %do.body
%phi.2 = phi i32 [ %phi.0, %do.body ], [ 0, %while.cond ]
br i1 %cmp7, label %while.cond, label %do.cond

do.cond: ; preds = %while.cond
br i1 true, label %do.body, label %do.end

do.end: ; preds = %do.cond
ret i32 %phi.2
}
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,49 @@ body: |
S_ENDPGM 0, implicit %1

...

# GCN-LABEL: name: find_debug_loc_end_iterator_regression
# GCN: %6:vreg_1 = COPY %4
# GCN: %14:sgpr_32 = S_MOV_B32 0

# GCN: %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
# GCN: %8:sreg_32 = PHI %14, %bb.0, %9, %bb.3

# GCN: %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
# GCN: %13:sreg_64 = COPY %6
---
name: find_debug_loc_end_iterator_regression
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr2, $vgpr3

%0:vgpr_32 = COPY $vgpr3
%1:vgpr_32 = COPY $vgpr2
%2:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
%3:sreg_64 = S_MOV_B64 -1
%4:sreg_64 = S_XOR_B64 killed %2, killed %3, implicit-def dead $scc
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:vreg_1 = COPY %4

bb.1:
%7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
%8:sreg_32 = PHI %5, %bb.0, %9, %bb.3
%10:sreg_64 = S_MOV_B64 0

bb.2:
%11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
%13:sreg_64 = COPY %6
%12:sreg_64 = SI_IF_BREAK %13, %11, implicit-def dead $scc
SI_LOOP %12, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.3

bb.3:
%9:sreg_32 = S_OR_B32 %8, 1, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.4

bb.4:
SI_RETURN

...