Skip to content

Commit 02bd766

Browse files
choikwabcahoon
authored andcommitted
[AMDGPU] Prevent hang in SIFoldOperands by caching uses (llvm#82099)
foldOperands() for REG_SEQUENCE has recursion that can trigger an infinite loop as the method can modify the operand order, which messes up the range-based for loop. This patch fixes the issue by caching the uses for processing beforehand, and then iterating over the cache rather using the instruction iterator. Change-Id: Iac081f4e363984cfd9917672e7d93107c51c97ac
1 parent 03d8fe5 commit 02bd766

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -773,20 +773,23 @@ void SIFoldOperands::foldOperand(
773773
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
774774
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
775775

776-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
777-
MachineInstr *RSUseMI = RSUse.getParent();
776+
// Grab the use operands first
777+
SmallVector<MachineOperand *, 4> UsesToProcess;
778+
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
779+
UsesToProcess.push_back(&Use);
780+
for (auto *RSUse : UsesToProcess) {
781+
MachineInstr *RSUseMI = RSUse->getParent();
778782

779783
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
780-
RSUseMI->getOperandNo(&RSUse), FoldList))
784+
RSUseMI->getOperandNo(RSUse), FoldList))
781785
continue;
782786

783-
if (RSUse.getSubReg() != RegSeqDstSubReg)
787+
if (RSUse->getSubReg() != RegSeqDstSubReg)
784788
continue;
785789

786-
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
790+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
787791
CopiesToReplace);
788792
}
789-
790793
return;
791794
}
792795

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
9+
%0:sreg_32 = S_MOV_B32 0
10+
%1:sreg_32 = S_MOV_B32 429
11+
%2:sreg_64 = REG_SEQUENCE killed %1, %subreg.sub0, %0, %subreg.sub1
12+
%3:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %2.sub0, implicit $exec
13+
%4:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
14+
%5:vgpr_32 = V_MUL_HI_U32_e64 %4, %2.sub0, implicit $exec
15+
S_ENDPGM 0
16+
17+
...
18+

0 commit comments

Comments
 (0)