Skip to content

Commit f903e3e

Browse files
committed
[AMDGPU] Reset kill flags for multiple uses of SDWAInst Ops
Change-Id: I8b56d86a55c397623567945a87ad2f55749680bc
1 parent a8e1c3e commit f903e3e

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,8 +1184,15 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
11841184
if (PotentialMatches.count(Operand->getParentInst()) == 0)
11851185
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
11861186
}
1187+
11871188
if (Converted) {
11881189
ConvertedInstructions.push_back(SDWAInst);
1190+
for (MachineOperand &MO : SDWAInst->uses()) {
1191+
if (!MO.isReg())
1192+
continue;
1193+
1194+
MRI->clearKillFlags(MO.getReg());
1195+
}
11891196
} else {
11901197
SDWAInst->eraseFromParent();
11911198
return false;

llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ body: |
3636
; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
3737
; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
3838
; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
39-
; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
39+
; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
4040
; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
4141
; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
4242
; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
@@ -185,7 +185,7 @@ body: |
185185
; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
186186
; SDWA-NEXT: {{ $}}
187187
; SDWA-NEXT: bb.2:
188-
; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
188+
; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
189189
; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
190190
; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
191191
; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
@@ -217,3 +217,36 @@ body: |
217217
$sgpr30_sgpr31 = COPY %2
218218
S_SETPC_B64_return $sgpr30_sgpr31
219219
...
220+
221+
# Should not add kill flag to reused ops in SDWAInst
222+
223+
---
224+
name: multiuse_kill
225+
tracksRegLiveness: true
226+
227+
body: |
228+
bb.0:
229+
; SDWA-LABEL: name: multiuse_kill
230+
; SDWA: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
231+
; SDWA-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
232+
; SDWA-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
233+
; SDWA-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF]], implicit $exec
234+
; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
235+
; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_]], [[DEF1]], implicit $exec
236+
; SDWA-NEXT: [[V_OR_B32_sdwa:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_]], 0, 6, 0, 4, 6, implicit $exec
237+
; SDWA-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF2]], implicit $exec
238+
; SDWA-NEXT: [[V_OR_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_1]], 0, 6, 0, 4, 6, implicit $exec
239+
; SDWA-NEXT: S_ENDPGM 0
240+
%0:vgpr_32 = IMPLICIT_DEF
241+
%1:vgpr_32 = IMPLICIT_DEF
242+
%2:vgpr_32 = IMPLICIT_DEF
243+
%3:vgpr_32 = V_LSHLREV_B32_e64 16, killed %0, implicit $exec
244+
%4:sreg_32 = S_MOV_B32 65535
245+
%5:vgpr_32 = V_AND_B32_e64 killed %4, killed %1, implicit $exec
246+
%6:vgpr_32 = V_OR_B32_e64 %5, killed %3, implicit $exec
247+
%7:vgpr_32 = V_LSHLREV_B32_e64 16, killed %2, implicit $exec
248+
%8:vgpr_32 = V_OR_B32_e64 %5, killed %7, implicit $exec
249+
250+
S_ENDPGM 0
251+
252+
...

0 commit comments

Comments
 (0)