Skip to content

Commit 20ded73

Browse files
committed
[AMDGPU] Prevent hang in SIFoldOperands
SI Folding REG_SEQUENCE can hang if RSUse is used in multiple MIs Added repro mir testcase.
1 parent 2dfa30d commit 20ded73

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1414
#include "SIMachineFunctionInfo.h"
1515
#include "llvm/ADT/DepthFirstIterator.h"
16+
#include "llvm/ADT/DenseSet.h"
1617
#include "llvm/CodeGen/MachineFunctionPass.h"
1718
#include "llvm/CodeGen/MachineOperand.h"
1819

@@ -74,6 +75,7 @@ class SIFoldOperands : public MachineFunctionPass {
7475
const SIRegisterInfo *TRI;
7576
const GCNSubtarget *ST;
7677
const SIMachineFunctionInfo *MFI;
78+
mutable DenseSet<MachineInstr *> SeenMI;
7779

7880
bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
7981
const MachineOperand &OpToFold) const;
@@ -772,7 +774,6 @@ void SIFoldOperands::foldOperand(
772774
if (UseMI->isRegSequence()) {
773775
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
774776
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
775-
776777
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
777778
MachineInstr *RSUseMI = RSUse.getParent();
778779

@@ -783,6 +784,10 @@ void SIFoldOperands::foldOperand(
783784
if (RSUse.getSubReg() != RegSeqDstSubReg)
784785
continue;
785786

787+
if (SeenMI.contains(RSUseMI))
788+
continue;
789+
SeenMI.insert(RSUseMI);
790+
786791
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
787792
CopiesToReplace);
788793
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
%33:sreg_32 = S_MOV_B32 0
9+
%34:sreg_32 = S_MOV_B32 429
10+
%35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
11+
%49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
12+
%75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
13+
%77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
14+
S_ENDPGM 0
15+
...
16+

0 commit comments

Comments
 (0)