Skip to content

Commit a845ea3

Browse files
authored
[AMDGPU] Fix SDWA 'preserve' transformation for instructions in different basic blocks. (#82406)
This fixes crash when operand sources for V_OR instruction reside in different basic blocks.
1 parent ce0687e commit a845ea3

File tree

2 files changed

+60
-4
lines changed

2 files changed

+60
-4
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,12 +472,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
472472
}
473473

474474
// Move MI before v_or_b32
475-
auto MBB = MI.getParent();
476-
MBB->remove(&MI);
477-
MBB->insert(getParentInst(), &MI);
475+
MI.getParent()->remove(&MI);
476+
getParentInst()->getParent()->insert(getParentInst(), &MI);
478477

479478
// Add Implicit use of preserved register
480-
MachineInstrBuilder MIB(*MBB->getParent(), MI);
479+
MachineInstrBuilder MIB(*MI.getMF(), MI);
481480
MIB.addReg(getPreservedOperand()->getReg(),
482481
RegState::ImplicitKill,
483482
getPreservedOperand()->getSubReg());

llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,60 @@ body: |
160160
S_ENDPGM 0
161161
162162
...
163+
---
164+
name: add_f16_u32_preserve_different_bb
165+
tracksRegLiveness: true
166+
body: |
167+
; SDWA-LABEL: name: add_f16_u32_preserve_different_bb
168+
; SDWA: bb.0:
169+
; SDWA-NEXT: successors: %bb.1(0x80000000)
170+
; SDWA-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
171+
; SDWA-NEXT: {{ $}}
172+
; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
173+
; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
174+
; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
175+
; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
176+
; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
177+
; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
178+
; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
179+
; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
180+
; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
181+
; SDWA-NEXT: {{ $}}
182+
; SDWA-NEXT: bb.1:
183+
; SDWA-NEXT: successors: %bb.2(0x80000000)
184+
; SDWA-NEXT: {{ $}}
185+
; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
186+
; SDWA-NEXT: {{ $}}
187+
; SDWA-NEXT: bb.2:
188+
; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
189+
; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
190+
; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
191+
; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
192+
bb.0:
193+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
194+
195+
%2:sreg_64 = COPY $sgpr30_sgpr31
196+
%1:vreg_64 = COPY $vgpr2_vgpr3
197+
%0:vreg_64 = COPY $vgpr0_vgpr1
198+
%3:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
199+
%4:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
200+
201+
%5:vgpr_32 = V_AND_B32_e32 65535, %3, implicit $exec
202+
%6:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
203+
%7:vgpr_32 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
204+
%8:vgpr_32 = V_LSHRREV_B32_e32 24, %4, implicit $exec
205+
206+
%9:vgpr_32 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
207+
%10:vgpr_32 = V_LSHLREV_B16_e64 8, %9, implicit $exec
208+
209+
bb.1:
210+
%11:vgpr_32 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
211+
%12:vgpr_32 = V_LSHLREV_B32_e64 16, %11, implicit $exec
212+
213+
bb.2:
214+
%13:vgpr_32 = V_OR_B32_e64 %10, %12, implicit $exec
215+
216+
FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
217+
$sgpr30_sgpr31 = COPY %2
218+
S_SETPC_B64_return $sgpr30_sgpr31
219+
...

0 commit comments

Comments
 (0)