Skip to content

Commit 13eb08b

Browse files
committed
[AMDGPU] Prevent hang in SIFoldOperands by caching Uses
foldOperands() for REG_SEQUENCE has recursion that can trigger infinite loop as the method can modify use operand order which messes up the range-based for loop. Cache the uses for processing beforehand so that iterators don't get messed up. Added repro mir testcase.
1 parent 08eced5 commit 13eb08b

File tree

2 files changed

+25
-6
lines changed

2 files changed

+25
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -775,20 +775,23 @@ void SIFoldOperands::foldOperand(
775775
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
776776
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
777777

778-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
779-
MachineInstr *RSUseMI = RSUse.getParent();
778+
// Grab the use operands first
779+
SmallVector<MachineOperand *, 4> UsesToProcess;
780+
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
781+
UsesToProcess.push_back(&Use);
782+
for (auto *RSUse : UsesToProcess) {
783+
MachineInstr *RSUseMI = RSUse->getParent();
780784

781785
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
782-
RSUseMI->getOperandNo(&RSUse), FoldList))
786+
RSUseMI->getOperandNo(RSUse), FoldList))
783787
continue;
784788

785-
if (RSUse.getSubReg() != RegSeqDstSubReg)
789+
if (RSUse->getSubReg() != RegSeqDstSubReg)
786790
continue;
787791

788-
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
792+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
789793
CopiesToReplace);
790794
}
791-
792795
return;
793796
}
794797

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
%33:sreg_32 = S_MOV_B32 0
9+
%34:sreg_32 = S_MOV_B32 429
10+
%35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
11+
%49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
12+
%75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
13+
%77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
14+
S_ENDPGM 0
15+
...
16+

0 commit comments

Comments
 (0)