Skip to content

Commit a8bba69

Browse files
committed
[AMDGPU] Prevent hang in SIFoldOperands by caching Uses
foldOperands() for REG_SEQUENCE has recursion that can trigger infinite loop as the method can modify use operand order which messes up the range-based for loop. Cache the uses for processing beforehand so that iterators don't get messed up. Added repro mir testcase.
1 parent b2ebd8b commit a8bba69

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -775,20 +775,23 @@ void SIFoldOperands::foldOperand(
775775
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
776776
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
777777

778-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
779-
MachineInstr *RSUseMI = RSUse.getParent();
778+
// Grab the use operands first
779+
SmallVector<MachineOperand *, 4> UsesToProcess;
780+
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
781+
UsesToProcess.push_back(&Use);
782+
for (auto *RSUse : UsesToProcess) {
783+
MachineInstr *RSUseMI = RSUse->getParent();
780784

781785
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
782-
RSUseMI->getOperandNo(&RSUse), FoldList))
786+
RSUseMI->getOperandNo(RSUse), FoldList))
783787
continue;
784788

785-
if (RSUse.getSubReg() != RegSeqDstSubReg)
789+
if (RSUse->getSubReg() != RegSeqDstSubReg)
786790
continue;
787791

788-
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
792+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
789793
CopiesToReplace);
790794
}
791-
792795
return;
793796
}
794797

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
9+
%0:sreg_32 = S_MOV_B32 0
10+
%1:sreg_32 = S_MOV_B32 429
11+
%2:sreg_64 = REG_SEQUENCE killed %1, %subreg.sub0, %0, %subreg.sub1
12+
%3:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %2.sub0, implicit $exec
13+
%4:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
14+
%5:vgpr_32 = V_MUL_HI_U32_e64 %4, %2.sub0, implicit $exec
15+
S_ENDPGM 0
16+
17+
...
18+

0 commit comments

Comments
 (0)