Skip to content

Commit 5164ef4

Browse files
committed
[AMDGPU] Prevent hang in SIFoldOperands by caching Uses
foldOperands() for REG_SEQUENCE has recursion that can trigger infinite loop as the method can modify use operand order which messes up the range-based for loop. Cache the uses for processing beforehand so that iterators don't get messed up. Added repro mir testcase.
1 parent d4fd202 commit 5164ef4

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,8 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
219219
default:
220220
return false;
221221
case AMDGPU::OPERAND_REG_IMM_V2FP16:
222-
case AMDGPU::OPERAND_REG_IMM_V2BF16:
223222
case AMDGPU::OPERAND_REG_IMM_V2INT16:
224223
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
225-
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
226224
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
227225
break;
228226
}
@@ -774,21 +772,24 @@ void SIFoldOperands::foldOperand(
774772
if (UseMI->isRegSequence()) {
775773
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
776774
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
777-
778-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
779-
MachineInstr *RSUseMI = RSUse.getParent();
775+
776+
// Grab the use operands first
777+
SmallVector<MachineOperand *, 4> UsesToProcess;
778+
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
779+
UsesToProcess.push_back(&Use);
780+
for (auto *RSUse : UsesToProcess) {
781+
MachineInstr *RSUseMI = RSUse->getParent();
780782

781783
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
782-
RSUseMI->getOperandNo(&RSUse), FoldList))
784+
RSUseMI->getOperandNo(RSUse), FoldList))
783785
continue;
784786

785-
if (RSUse.getSubReg() != RegSeqDstSubReg)
787+
if (RSUse->getSubReg() != RegSeqDstSubReg)
786788
continue;
787789

788-
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
790+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
789791
CopiesToReplace);
790792
}
791-
792793
return;
793794
}
794795

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
2+
3+
---
4+
name: fold_reg_sequence
5+
body: |
6+
bb.0:
7+
liveins: $vgpr0_vgpr1, $vgpr2
8+
%33:sreg_32 = S_MOV_B32 0
9+
%34:sreg_32 = S_MOV_B32 429
10+
%35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
11+
%49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
12+
%75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
13+
%77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
14+
S_ENDPGM 0
15+
...
16+

0 commit comments

Comments
 (0)