Skip to content

Commit b8b3a02

Browse files
vpykhtinbcahoon
authored andcommitted
[AMDGPU] Fix SDWA 'preserve' transformation for instructions in different basic blocks. (llvm#82406)
This fixes crash when operand sources for V_OR instruction reside in different basic blocks. Change-Id: I1a5db649e174f76b1c749b02467768bd0d3ca440 (cherry picked from commit 68f6b4e)
1 parent a749aae commit b8b3a02

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -481,12 +481,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
481481
}
482482

483483
// Move MI before v_or_b32
484-
auto MBB = MI.getParent();
485-
MBB->remove(&MI);
486-
MBB->insert(getParentInst(), &MI);
484+
MI.getParent()->remove(&MI);
485+
getParentInst()->getParent()->insert(getParentInst(), &MI);
487486

488487
// Add Implicit use of preserved register
489-
MachineInstrBuilder MIB(*MBB->getParent(), MI);
488+
MachineInstrBuilder MIB(*MI.getMF(), MI);
490489
MIB.addReg(getPreservedOperand()->getReg(),
491490
RegState::ImplicitKill,
492491
getPreservedOperand()->getSubReg());

llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,36 @@ body: |
142142
S_ENDPGM 0
143143
144144
...
145+
---
146+
147+
name: add_f16_u32_preserve_different_bb
148+
tracksRegLiveness: true
149+
body: |
150+
bb.0:
151+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
152+
153+
%2:sreg_64 = COPY $sgpr30_sgpr31
154+
%1:vreg_64 = COPY $vgpr2_vgpr3
155+
%0:vreg_64 = COPY $vgpr0_vgpr1
156+
%3:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
157+
%4:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
158+
159+
%5:vgpr_32 = V_AND_B32_e32 65535, %3, implicit $exec
160+
%6:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
161+
%7:vgpr_32 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
162+
%8:vgpr_32 = V_LSHRREV_B32_e32 24, %4, implicit $exec
163+
164+
%9:vgpr_32 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
165+
%10:vgpr_32 = V_LSHLREV_B16_e64 8, %9, implicit $exec
166+
167+
bb.1:
168+
%11:vgpr_32 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
169+
%12:vgpr_32 = V_LSHLREV_B32_e64 16, %11, implicit $exec
170+
171+
bb.2:
172+
%13:vgpr_32 = V_OR_B32_e64 %10, %12, implicit $exec
173+
174+
FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
175+
$sgpr30_sgpr31 = COPY %2
176+
S_SETPC_B64_return $sgpr30_sgpr31
177+
...

0 commit comments

Comments
 (0)