Skip to content

Commit 9e0be65

Browse files
authored
[AMDGPU] Fix broken MIR generated by gfx11 simulated trap lowering (#91652)
This was breaking the CFG connection between uses of virtual registers after the trap and their definitions before it. Fixes SWDEV-460384. Fixes a bug in #85854.
1 parent 9276a03 commit 9e0be65

File tree

3 files changed

+255
-35
lines changed

3 files changed

+255
-35
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,50 +2031,57 @@ MachineBasicBlock *SIInstrInfo::insertSimulatedTrap(MachineRegisterInfo &MRI,
20312031
MachineInstr &MI,
20322032
const DebugLoc &DL) const {
20332033
MachineFunction *MF = MBB.getParent();
2034-
MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns=*/false);
2035-
MachineBasicBlock *HaltLoop = MF->CreateMachineBasicBlock();
2036-
MF->push_back(HaltLoop);
2037-
20382034
constexpr unsigned DoorbellIDMask = 0x3ff;
20392035
constexpr unsigned ECQueueWaveAbort = 0x400;
20402036

2037+
MachineBasicBlock *TrapBB = &MBB;
2038+
MachineBasicBlock *ContBB = &MBB;
2039+
MachineBasicBlock *HaltLoopBB = MF->CreateMachineBasicBlock();
2040+
2041+
if (!MBB.succ_empty() || std::next(MI.getIterator()) != MBB.end()) {
2042+
ContBB = MBB.splitAt(MI, /*UpdateLiveIns=*/false);
2043+
TrapBB = MF->CreateMachineBasicBlock();
2044+
BuildMI(MBB, MI, DL, get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(TrapBB);
2045+
MF->push_back(TrapBB);
2046+
MBB.addSuccessor(TrapBB);
2047+
}
2048+
20412049
// Start with a `s_trap 2`, if we're in PRIV=1 and we need the workaround this
20422050
// will be a nop.
2043-
BuildMI(MBB, MI, DL, get(AMDGPU::S_TRAP))
2051+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_TRAP))
20442052
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap));
20452053
Register DoorbellReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2046-
BuildMI(MBB, MI, DL, get(AMDGPU::S_SENDMSG_RTN_B32), DoorbellReg)
2054+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_SENDMSG_RTN_B32),
2055+
DoorbellReg)
20472056
.addImm(AMDGPU::SendMsg::ID_RTN_GET_DOORBELL);
2048-
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2057+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
20492058
.addUse(AMDGPU::M0);
20502059
Register DoorbellRegMasked =
20512060
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2052-
BuildMI(MBB, MI, DL, get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2061+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_AND_B32), DoorbellRegMasked)
20532062
.addUse(DoorbellReg)
20542063
.addImm(DoorbellIDMask);
20552064
Register SetWaveAbortBit =
20562065
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2057-
BuildMI(MBB, MI, DL, get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2066+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_OR_B32), SetWaveAbortBit)
20582067
.addUse(DoorbellRegMasked)
20592068
.addImm(ECQueueWaveAbort);
2060-
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2069+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0)
20612070
.addUse(SetWaveAbortBit);
2062-
BuildMI(MBB, MI, DL, get(AMDGPU::S_SENDMSG))
2071+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_SENDMSG))
20632072
.addImm(AMDGPU::SendMsg::ID_INTERRUPT);
2064-
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2073+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_MOV_B32), AMDGPU::M0)
20652074
.addUse(AMDGPU::TTMP2);
2066-
BuildMI(MBB, MI, DL, get(AMDGPU::S_BRANCH)).addMBB(HaltLoop);
2067-
2068-
BuildMI(*HaltLoop, HaltLoop->end(), DL, get(AMDGPU::S_SETHALT)).addImm(5);
2069-
BuildMI(*HaltLoop, HaltLoop->end(), DL, get(AMDGPU::S_BRANCH))
2070-
.addMBB(HaltLoop);
2075+
BuildMI(*TrapBB, TrapBB->end(), DL, get(AMDGPU::S_BRANCH)).addMBB(HaltLoopBB);
2076+
TrapBB->addSuccessor(HaltLoopBB);
20712077

2072-
if (SplitBB != &MBB)
2073-
MBB.removeSuccessor(SplitBB);
2074-
MBB.addSuccessor(HaltLoop);
2075-
HaltLoop->addSuccessor(HaltLoop);
2078+
BuildMI(*HaltLoopBB, HaltLoopBB->end(), DL, get(AMDGPU::S_SETHALT)).addImm(5);
2079+
BuildMI(*HaltLoopBB, HaltLoopBB->end(), DL, get(AMDGPU::S_BRANCH))
2080+
.addMBB(HaltLoopBB);
2081+
MF->push_back(HaltLoopBB);
2082+
HaltLoopBB->addSuccessor(HaltLoopBB);
20762083

2077-
return SplitBB;
2084+
return ContBB;
20782085
}
20792086

20802087
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
Lines changed: 76 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2-
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GFX1100 %s
3-
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GFX1100 %s
4-
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1150 -o - -run-pass=legalizer %s | FileCheck -check-prefix=GFX1150 %s
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -o - -run-pass=legalizer %s -verify-machineinstrs | FileCheck -check-prefix=GFX1100 %s
3+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - -run-pass=legalizer %s -verify-machineinstrs | FileCheck -check-prefix=GFX1100 %s
4+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1150 -o - -run-pass=legalizer %s -verify-machineinstrs | FileCheck -check-prefix=GFX1150 %s
55

66
---
77
name: test_trap
88
body: |
99
bb.0:
1010
; GFX1100-LABEL: name: test_trap
11-
; GFX1100: successors: %bb.2(0x80000000)
11+
; GFX1100: successors: %bb.1(0x40000000), %bb.2(0x40000000)
1212
; GFX1100-NEXT: {{ $}}
1313
; GFX1100-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1414
; GFX1100-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1515
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
16+
; GFX1100-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
17+
; GFX1100-NEXT: {{ $}}
18+
; GFX1100-NEXT: .1:
19+
; GFX1100-NEXT: successors:
20+
; GFX1100-NEXT: {{ $}}
21+
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
22+
; GFX1100-NEXT: {{ $}}
23+
; GFX1100-NEXT: .2:
24+
; GFX1100-NEXT: successors: %bb.3(0x80000000)
25+
; GFX1100-NEXT: {{ $}}
1626
; GFX1100-NEXT: S_TRAP 2
1727
; GFX1100-NEXT: [[S_SENDMSG_RTN_B32_:%[0-9]+]]:sreg_32 = S_SENDMSG_RTN_B32 128
1828
; GFX1100-NEXT: $ttmp2 = S_MOV_B32 $m0
@@ -21,18 +31,13 @@ body: |
2131
; GFX1100-NEXT: $m0 = S_MOV_B32 [[S_OR_B32_]]
2232
; GFX1100-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0
2333
; GFX1100-NEXT: $m0 = S_MOV_B32 $ttmp2
24-
; GFX1100-NEXT: S_BRANCH %bb.2
25-
; GFX1100-NEXT: {{ $}}
26-
; GFX1100-NEXT: .1:
27-
; GFX1100-NEXT: successors:
34+
; GFX1100-NEXT: S_BRANCH %bb.3
2835
; GFX1100-NEXT: {{ $}}
29-
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
30-
; GFX1100-NEXT: {{ $}}
31-
; GFX1100-NEXT: .2:
32-
; GFX1100-NEXT: successors: %bb.2(0x80000000)
36+
; GFX1100-NEXT: .3:
37+
; GFX1100-NEXT: successors: %bb.3(0x80000000)
3338
; GFX1100-NEXT: {{ $}}
3439
; GFX1100-NEXT: S_SETHALT 5
35-
; GFX1100-NEXT: S_BRANCH %bb.2
40+
; GFX1100-NEXT: S_BRANCH %bb.3
3641
;
3742
; GFX1150-LABEL: name: test_trap
3843
; GFX1150: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -45,5 +50,63 @@ body: |
4550
G_STORE %0, %1 :: (store 1, addrspace 1)
4651
G_TRAP
4752
G_STORE %0, %1 :: (store 1, addrspace 1)
53+
...
54+
55+
---
56+
name: test_fallthrough_trap
57+
body: |
58+
; GFX1100-LABEL: name: test_fallthrough_trap
59+
; GFX1100: bb.0:
60+
; GFX1100-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
61+
; GFX1100-NEXT: {{ $}}
62+
; GFX1100-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
63+
; GFX1100-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
64+
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
65+
; GFX1100-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
66+
; GFX1100-NEXT: {{ $}}
67+
; GFX1100-NEXT: bb.1:
68+
; GFX1100-NEXT: successors:
69+
; GFX1100-NEXT: {{ $}}
70+
; GFX1100-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
71+
; GFX1100-NEXT: {{ $}}
72+
; GFX1100-NEXT: bb.2:
73+
; GFX1100-NEXT: successors: %bb.3(0x80000000)
74+
; GFX1100-NEXT: {{ $}}
75+
; GFX1100-NEXT: S_TRAP 2
76+
; GFX1100-NEXT: [[S_SENDMSG_RTN_B32_:%[0-9]+]]:sreg_32 = S_SENDMSG_RTN_B32 128
77+
; GFX1100-NEXT: $ttmp2 = S_MOV_B32 $m0
78+
; GFX1100-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_SENDMSG_RTN_B32_]], 1023, implicit-def $scc
79+
; GFX1100-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], 1024, implicit-def $scc
80+
; GFX1100-NEXT: $m0 = S_MOV_B32 [[S_OR_B32_]]
81+
; GFX1100-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0
82+
; GFX1100-NEXT: $m0 = S_MOV_B32 $ttmp2
83+
; GFX1100-NEXT: S_BRANCH %bb.3
84+
; GFX1100-NEXT: {{ $}}
85+
; GFX1100-NEXT: bb.3:
86+
; GFX1100-NEXT: successors: %bb.3(0x80000000)
87+
; GFX1100-NEXT: {{ $}}
88+
; GFX1100-NEXT: S_SETHALT 5
89+
; GFX1100-NEXT: S_BRANCH %bb.3
90+
;
91+
; GFX1150-LABEL: name: test_fallthrough_trap
92+
; GFX1150: bb.0:
93+
; GFX1150-NEXT: successors: %bb.1(0x80000000)
94+
; GFX1150-NEXT: {{ $}}
95+
; GFX1150-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
96+
; GFX1150-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
97+
; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
98+
; GFX1150-NEXT: S_TRAP 2
99+
; GFX1150-NEXT: {{ $}}
100+
; GFX1150-NEXT: bb.1:
101+
; GFX1150-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
102+
bb.0:
103+
successors: %bb.1
104+
105+
%0:_(s8) = G_CONSTANT i8 0
106+
%1:_(p1) = G_CONSTANT i64 0
107+
G_STORE %0, %1 :: (store 1, addrspace 1)
108+
G_TRAP
48109
110+
bb.1:
111+
G_STORE %0, %1 :: (store 1, addrspace 1)
49112
...

llvm/test/CodeGen/AMDGPU/trap-abis.ll

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,142 @@ ret:
264264
ret void
265265
}
266266

267+
define amdgpu_kernel void @trap_with_use_after(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
268+
; NOHSA-TRAP-GFX900-LABEL: trap_with_use_after:
269+
; NOHSA-TRAP-GFX900: ; %bb.0:
270+
; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
271+
; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
272+
; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
273+
; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
274+
; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
275+
; NOHSA-TRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
276+
; NOHSA-TRAP-GFX900-NEXT: ; %bb.1:
277+
; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
278+
; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
279+
; NOHSA-TRAP-GFX900-NEXT: .LBB2_2:
280+
; NOHSA-TRAP-GFX900-NEXT: s_endpgm
281+
;
282+
; HSA-TRAP-GFX803-LABEL: trap_with_use_after:
283+
; HSA-TRAP-GFX803: ; %bb.0:
284+
; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5]
285+
; HSA-TRAP-GFX803-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
286+
; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
287+
; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s4
288+
; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s5
289+
; HSA-TRAP-GFX803-NEXT: flat_load_dword v2, v[0:1] glc
290+
; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
291+
; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s6
292+
; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s7
293+
; HSA-TRAP-GFX803-NEXT: s_trap 2
294+
; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
295+
; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
296+
; HSA-TRAP-GFX803-NEXT: s_endpgm
297+
;
298+
; HSA-TRAP-GFX900-LABEL: trap_with_use_after:
299+
; HSA-TRAP-GFX900: ; %bb.0:
300+
; HSA-TRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
301+
; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
302+
; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
303+
; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
304+
; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
305+
; HSA-TRAP-GFX900-NEXT: s_trap 2
306+
; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
307+
; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
308+
; HSA-TRAP-GFX900-NEXT: s_endpgm
309+
;
310+
; HSA-NOTRAP-GFX900-LABEL: trap_with_use_after:
311+
; HSA-NOTRAP-GFX900: ; %bb.0:
312+
; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
313+
; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
314+
; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
315+
; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
316+
; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
317+
; HSA-NOTRAP-GFX900-NEXT: s_cbranch_execnz .LBB2_2
318+
; HSA-NOTRAP-GFX900-NEXT: ; %bb.1:
319+
; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[2:3]
320+
; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
321+
; HSA-NOTRAP-GFX900-NEXT: .LBB2_2:
322+
; HSA-NOTRAP-GFX900-NEXT: s_endpgm
323+
;
324+
; HSA-TRAP-GFX1100-LABEL: trap_with_use_after:
325+
; HSA-TRAP-GFX1100: ; %bb.0:
326+
; HSA-TRAP-GFX1100-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
327+
; HSA-TRAP-GFX1100-NEXT: v_mov_b32_e32 v0, 0
328+
; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
329+
; HSA-TRAP-GFX1100-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
330+
; HSA-TRAP-GFX1100-NEXT: s_waitcnt vmcnt(0)
331+
; HSA-TRAP-GFX1100-NEXT: s_cbranch_execnz .LBB2_2
332+
; HSA-TRAP-GFX1100-NEXT: ; %bb.1:
333+
; HSA-TRAP-GFX1100-NEXT: global_store_b32 v0, v1, s[2:3] dlc
334+
; HSA-TRAP-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0
335+
; HSA-TRAP-GFX1100-NEXT: s_nop 0
336+
; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
337+
; HSA-TRAP-GFX1100-NEXT: s_endpgm
338+
; HSA-TRAP-GFX1100-NEXT: .LBB2_2:
339+
; HSA-TRAP-GFX1100-NEXT: s_trap 2
340+
; HSA-TRAP-GFX1100-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
341+
; HSA-TRAP-GFX1100-NEXT: s_mov_b32 ttmp2, m0
342+
; HSA-TRAP-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
343+
; HSA-TRAP-GFX1100-NEXT: s_and_b32 s0, s0, 0x3ff
344+
; HSA-TRAP-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
345+
; HSA-TRAP-GFX1100-NEXT: s_bitset1_b32 s0, 10
346+
; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, s0
347+
; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
348+
; HSA-TRAP-GFX1100-NEXT: s_mov_b32 m0, ttmp2
349+
; HSA-TRAP-GFX1100-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
350+
; HSA-TRAP-GFX1100-NEXT: s_sethalt 5
351+
; HSA-TRAP-GFX1100-NEXT: s_branch .LBB2_3
352+
;
353+
; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after:
354+
; HSA-TRAP-GFX1100-O0: ; %bb.0:
355+
; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
356+
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
357+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill
358+
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
359+
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[2:3], s[4:5], 0x8
360+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
361+
; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s2, 0
362+
; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s3, 1
363+
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
364+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill
365+
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
366+
; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
367+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
368+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill
369+
; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2
370+
; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1:
371+
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
372+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
373+
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
374+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
375+
; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0
376+
; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1
377+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload
378+
; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload
379+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
380+
; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc
381+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
382+
; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0
383+
; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
384+
; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_2:
385+
; HSA-TRAP-GFX1100-O0-NEXT: s_trap 2
386+
; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL)
387+
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 ttmp2, m0
388+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
389+
; HSA-TRAP-GFX1100-O0-NEXT: s_and_b32 s0, s0, 0x3ff
390+
; HSA-TRAP-GFX1100-O0-NEXT: s_or_b32 s0, s0, 0x400
391+
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, s0
392+
; HSA-TRAP-GFX1100-O0-NEXT: s_sendmsg sendmsg(MSG_INTERRUPT)
393+
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 m0, ttmp2
394+
; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_3: ; =>This Inner Loop Header: Depth=1
395+
; HSA-TRAP-GFX1100-O0-NEXT: s_sethalt 5
396+
; HSA-TRAP-GFX1100-O0-NEXT: s_branch .LBB2_3
397+
%tmp = load volatile i32, ptr addrspace(1) %arg0
398+
call void @llvm.trap()
399+
store volatile i32 %tmp, ptr addrspace(1) %arg1
400+
ret void
401+
}
402+
267403
define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
268404
; NOHSA-TRAP-GFX900-LABEL: debugtrap:
269405
; NOHSA-TRAP-GFX900: ; %bb.0:
@@ -334,6 +470,20 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
334470
; HSA-TRAP-GFX1100-NEXT: s_nop 0
335471
; HSA-TRAP-GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
336472
; HSA-TRAP-GFX1100-NEXT: s_endpgm
473+
;
474+
; HSA-TRAP-GFX1100-O0-LABEL: debugtrap:
475+
; HSA-TRAP-GFX1100-O0: ; %bb.0:
476+
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
477+
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
478+
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 1
479+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
480+
; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
481+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
482+
; HSA-TRAP-GFX1100-O0-NEXT: s_trap 3
483+
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 2
484+
; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
485+
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
486+
; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
337487
store volatile i32 1, ptr addrspace(1) %arg0
338488
call void @llvm.debugtrap()
339489
store volatile i32 2, ptr addrspace(1) %arg0

0 commit comments

Comments
 (0)