Skip to content

Commit 505906b

Browse files
authored
[AMDGPU][True16][CodeGen] do not legalize t16 operand during user scan (#145450)
The legalize t16 operand function could insert a reg_sequence which modify the user list of the targetted register, and we should not call it in the middle of an user list iteration
1 parent c85466d commit 505906b

File tree

2 files changed

+32
-13
lines changed

2 files changed

+32
-13
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8716,9 +8716,8 @@ void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
87168716
void SIInstrInfo::addUsersToMoveToVALUWorklist(
87178717
Register DstReg, MachineRegisterInfo &MRI,
87188718
SIInstrWorklist &Worklist) const {
8719-
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
8720-
E = MRI.use_end(); I != E;) {
8721-
MachineInstr &UseMI = *I->getParent();
8719+
for (MachineOperand &MO : make_early_inc_range(MRI.use_operands(DstReg))) {
8720+
MachineInstr &UseMI = *MO.getParent();
87228721

87238722
unsigned OpNo = 0;
87248723

@@ -8733,21 +8732,15 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
87338732
case AMDGPU::INSERT_SUBREG:
87348733
break;
87358734
default:
8736-
OpNo = I.getOperandNo();
8735+
OpNo = MO.getOperandNo();
87378736
break;
87388737
}
87398738

8740-
if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) {
8739+
if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo)))
87418740
Worklist.insert(&UseMI);
8742-
8743-
do {
8744-
++I;
8745-
} while (I != E && I->getParent() == &UseMI);
8746-
} else {
8741+
else
8742+
// Legalization could change user list.
87478743
legalizeOperandsVALUt16(UseMI, OpNo, MRI);
8748-
8749-
++I;
8750-
}
87518744
}
87528745
}
87538746

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,32 @@ body: |
108108
%4:sreg_32 = S_FMAC_F16 %3:sreg_32, %3:sreg_32, %2:sreg_32, implicit $mode
109109
...
110110

111+
---
112+
name: legalize_with_multi_user
113+
body: |
114+
bb.0:
115+
; GCN-LABEL: name: legalize_with_multi_user
116+
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
117+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
118+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16
119+
; GCN-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_ADD_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 1, 0, 0, 0, implicit $mode, implicit $exec
120+
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
121+
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
122+
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
123+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF3]], %subreg.hi16
124+
; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = V_PK_FMA_F16 11, [[S_MOV_B32_]], 0, [[REG_SEQUENCE1]], 8, [[DEF2]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
125+
; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
126+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF4]], %subreg.hi16
127+
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE2]], [[S_MOV_B32_]], implicit $exec
128+
%0:vgpr_16 = IMPLICIT_DEF
129+
%1:sreg_32 = COPY %0:vgpr_16
130+
%2:sreg_32 = S_ADD_F16 %1:sreg_32, 1, implicit $mode
131+
%3:sreg_32 = S_MOV_B32 32768
132+
%4:vgpr_32 = IMPLICIT_DEF
133+
%5:vgpr_32 = V_PK_FMA_F16 11, %3:sreg_32, 0, %2:sreg_32, 8, %4:vgpr_32, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
134+
%6:sreg_32 = S_XOR_B32 %2:sreg_32, %3:sreg_32, implicit-def dead $scc
135+
...
136+
111137
---
112138
name: vgpr16_to_spgr32
113139
body: |

0 commit comments

Comments
 (0)