Skip to content

Commit 36c55f4

Browse files
committed
[AMDGPU] Prevent hang in SIFoldOperands by caching Uses
foldOperands() for REG_SEQUENCE has recursion that can trigger infinite loop as the method can modify use operand order which messes up the range-based for loop. Cache the uses for processing beforehand so that iterators don't get messed up. Added repro mir testcase.
1 parent ddc0f1d commit 36c55f4

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -774,21 +774,25 @@ void SIFoldOperands::foldOperand(
774774
if (UseMI->isRegSequence()) {
775775
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
776776
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
777-
778-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
779-
MachineInstr *RSUseMI = RSUse.getParent();
777+
778+
// Grab the use operands first
779+
SmallVector<MachineOperand *, 4> UsesToProcess;
780+
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg)) {
781+
UsesToProcess.push_back(&Use);
782+
}
783+
for (auto *RSUse : UsesToProcess) {
784+
MachineInstr *RSUseMI = RSUse->getParent();
780785

781786
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
782-
RSUseMI->getOperandNo(&RSUse), FoldList))
787+
RSUseMI->getOperandNo(RSUse), FoldList))
783788
continue;
784789

785-
if (RSUse.getSubReg() != RegSeqDstSubReg)
790+
if (RSUse->getSubReg() != RegSeqDstSubReg)
786791
continue;
787792

788-
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
793+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
789794
CopiesToReplace);
790795
}
791-
792796
return;
793797
}
794798

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
%33:sreg_32 = S_MOV_B32 0
9+
%34:sreg_32 = S_MOV_B32 429
10+
%35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
11+
%49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
12+
%75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
13+
%77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
14+
S_ENDPGM 0
15+
...
16+

0 commit comments

Comments
 (0)