Skip to content

[AMDGPU] Introduce AMDGPU::SGPR_SPILL asm comment flag #67091

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,21 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
Streamer.popSection();
}

void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
Register RegNo = MI->getOperand(0).getReg();

SmallString<128> Str;
raw_svector_ostream OS(Str);
OS << "implicit-def: "
<< printReg(RegNo, MF->getSubtarget().getRegisterInfo());

if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
OS << " : SGPR spill to VGPR lane";

OutStreamer->AddComment(OS.str());
OutStreamer->addBlankLine();
}

void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::emitFunctionEntryLabel();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {

void emitFunctionBodyEnd() override;

void emitImplicitDef(const MachineInstr *MI) const override;

void emitFunctionEntryLabel() override;

void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,13 @@ namespace AMDGPU {

} // end namespace AMDGPU

namespace AMDGPU {
enum AsmComments {
// For sgpr to vgpr spill instructions
SGPR_SPILL = MachineInstr::TAsmComments
};
} // namespace AMDGPU

namespace SI {
namespace KernelInputOffsets {

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
TII->get(AMDGPU::IMPLICIT_DEF), Reg);
MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
// Set SGPR_SPILL asm printer flag
MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
if (LIS) {
LIS->InsertMachineInstrInMaps(*MIB);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: ; implicit-def: $vgpr8
; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; CHECK-NEXT: v_mov_b32_e32 v8, v0
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
Expand Down Expand Up @@ -144,7 +144,7 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
Expand Down Expand Up @@ -232,7 +232,7 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
Expand Down Expand Up @@ -321,7 +321,7 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
Expand Down Expand Up @@ -422,7 +422,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -221,7 +221,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -430,7 +430,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -676,7 +676,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -931,7 +931,7 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -1080,7 +1080,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
; GCN-O0-NEXT: ; implicit-def: $vgpr1
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(1)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0
; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1
Expand Down Expand Up @@ -240,7 +240,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; FLAT_SCR_ARCH-LABEL: test:
; FLAT_SCR_ARCH: ; %bb.0:
; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0
; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0)
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s12, s33, 0x100200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr5
; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v5, v3
; W64-O0-NEXT: v_mov_b32_e32 v6, v2
Expand Down Expand Up @@ -497,7 +497,7 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr13
; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1019,7 +1019,7 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr8
; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v8, v6
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr5
; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v6, v2
Expand Down Expand Up @@ -511,7 +511,7 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr13
; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1058,7 +1058,7 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: ; implicit-def: $vgpr8
; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_call() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
Expand Down Expand Up @@ -139,7 +139,7 @@ define amdgpu_kernel void @kernel_tailcall() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
Expand Down Expand Up @@ -260,7 +260,7 @@ define protected amdgpu_kernel void @kernel() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: s_mov_b32 s95, 0xe8f000
; GCN-NEXT: s_add_u32 s92, s92, s11
; GCN-NEXT: s_addc_u32 s93, s93, 0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: ; implicit-def: $vgpr2
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
Expand Down Expand Up @@ -488,8 +488,8 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
Expand Down Expand Up @@ -738,8 +738,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -989,8 +989,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-LABEL: kernel0:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; implicit-def: $vgpr23
; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN: ; %bb.0:
; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s4, s[8:9], 0x2
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define amdgpu_kernel void @kernel() {
; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s38, -1
; GCN-NEXT: s_mov_b32 s39, 0xe00000
; GCN-NEXT: ; implicit-def: $vgpr3
; GCN-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; GCN-NEXT: s_add_u32 s36, s36, s11
; GCN-NEXT: v_writelane_b32 v3, s4, 0
; GCN-NEXT: s_movk_i32 s32, 0x400
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440
; GCN-NEXT: s_waitcnt vmcnt(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @test() {
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; CHECK-NEXT: .LBB0_1: ; %bb.1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/swdev380865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0
; CHECK-NEXT: ; implicit-def: $vgpr2
; CHECK-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; CHECK-NEXT: ; kill: killed $sgpr0_sgpr1
; CHECK-NEXT: s_mov_b32 s7, 0x401c0000
; CHECK-NEXT: s_mov_b32 s5, 0x40280000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
Expand Down
Loading