Skip to content

Commit 364b97f

Browse files
authored
[AMDGPU][True16][CodeGen] 16bit spill support in true16 mode (llvm#128060)
Enables 16-bit values to be spilled to scratch. Note, the memory instructions used are defined as reading and writing VGPR_32, but do not clobber the unspecified 16-bits of those registers, and so spills and reloads of lo and hi halves of the registers work.
1 parent 7b6abd8 commit 364b97f

File tree

6 files changed

+615
-4
lines changed

6 files changed

+615
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
15801580

15811581
static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
15821582
switch (Size) {
1583+
case 2:
1584+
return AMDGPU::SI_SPILL_V16_SAVE;
15831585
case 4:
15841586
return AMDGPU::SI_SPILL_V32_SAVE;
15851587
case 8:
@@ -1807,6 +1809,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
18071809

18081810
static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
18091811
switch (Size) {
1812+
case 2:
1813+
return AMDGPU::SI_SPILL_V16_RESTORE;
18101814
case 4:
18111815
return AMDGPU::SI_SPILL_V32_RESTORE;
18121816
case 8:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {
10031003
} // End UseNamedOperandTable = 1, Spill = 1, VALU = 1, SchedRW = [WriteVMEM]
10041004
}
10051005

1006+
defm SI_SPILL_V16 : SI_SPILL_VGPR <VGPR_16>;
10061007
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
10071008
defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
10081009
defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
12801280
case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
12811281
case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
12821282
case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1283+
case AMDGPU::SI_SPILL_V16_SAVE:
1284+
case AMDGPU::SI_SPILL_V16_RESTORE:
12831285
return 1;
12841286
default: llvm_unreachable("Invalid spill opcode");
12851287
}
@@ -2350,6 +2352,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
23502352
case AMDGPU::SI_SPILL_V96_SAVE:
23512353
case AMDGPU::SI_SPILL_V64_SAVE:
23522354
case AMDGPU::SI_SPILL_V32_SAVE:
2355+
case AMDGPU::SI_SPILL_V16_SAVE:
23532356
case AMDGPU::SI_SPILL_A1024_SAVE:
23542357
case AMDGPU::SI_SPILL_A512_SAVE:
23552358
case AMDGPU::SI_SPILL_A384_SAVE:
@@ -2390,8 +2393,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
23902393
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
23912394
MFI->getStackPtrOffsetReg());
23922395

2393-
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2394-
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2396+
unsigned Opc;
2397+
if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2398+
assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
2399+
Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2400+
} else {
2401+
Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2402+
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2403+
}
2404+
23952405
auto *MBB = MI->getParent();
23962406
bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
23972407
if (IsWWMRegSpill) {
@@ -2409,6 +2419,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
24092419
MI->eraseFromParent();
24102420
return true;
24112421
}
2422+
case AMDGPU::SI_SPILL_V16_RESTORE:
24122423
case AMDGPU::SI_SPILL_V32_RESTORE:
24132424
case AMDGPU::SI_SPILL_V64_RESTORE:
24142425
case AMDGPU::SI_SPILL_V96_RESTORE:
@@ -2458,8 +2469,14 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
24582469
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
24592470
MFI->getStackPtrOffsetReg());
24602471

2461-
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2462-
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2472+
unsigned Opc;
2473+
if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2474+
assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
2475+
Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16;
2476+
} else {
2477+
Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2478+
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2479+
}
24632480
auto *MBB = MI->getParent();
24642481
bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
24652482
if (IsWWMRegSpill) {
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=EXPANDED %s
3+
4+
---
5+
name: spill_restore_vgpr16
6+
tracksRegLiveness: true
7+
stack:
8+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
9+
- { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 }
10+
machineFunctionInfo:
11+
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
12+
stackPtrOffsetReg: $sgpr32
13+
hasSpilledVGPRs: true
14+
body: |
15+
; EXPANDED-LABEL: name: spill_restore_vgpr16
16+
; EXPANDED: bb.0:
17+
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
18+
; EXPANDED-NEXT: {{ $}}
19+
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
20+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
21+
; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
22+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
23+
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
24+
; EXPANDED-NEXT: {{ $}}
25+
; EXPANDED-NEXT: bb.1:
26+
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
27+
; EXPANDED-NEXT: {{ $}}
28+
; EXPANDED-NEXT: S_NOP 1
29+
; EXPANDED-NEXT: {{ $}}
30+
; EXPANDED-NEXT: bb.2:
31+
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
32+
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
33+
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
34+
bb.0:
35+
successors: %bb.1(0x80000000)
36+
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
37+
SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5)
38+
S_NOP 0, implicit renamable $vgpr0_lo16
39+
SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5)
40+
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
41+
bb.1:
42+
successors: %bb.2(0x80000000)
43+
S_NOP 1
44+
bb.2:
45+
$vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5)
46+
$vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5)
47+
S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
48+
...
49+
50+
---
51+
name: spill_restore_vgpr16_middle_of_block
52+
tracksRegLiveness: true
53+
stack:
54+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
55+
- { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 }
56+
machineFunctionInfo:
57+
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
58+
stackPtrOffsetReg: $sgpr32
59+
hasSpilledVGPRs: true
60+
body: |
61+
; EXPANDED-LABEL: name: spill_restore_vgpr16_middle_of_block
62+
; EXPANDED: bb.0:
63+
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
64+
; EXPANDED-NEXT: {{ $}}
65+
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
66+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
67+
; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
68+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
69+
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
70+
; EXPANDED-NEXT: {{ $}}
71+
; EXPANDED-NEXT: bb.1:
72+
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
73+
; EXPANDED-NEXT: {{ $}}
74+
; EXPANDED-NEXT: S_NOP 1
75+
; EXPANDED-NEXT: {{ $}}
76+
; EXPANDED-NEXT: bb.2:
77+
; EXPANDED-NEXT: S_NOP 1
78+
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
79+
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
80+
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
81+
bb.0:
82+
successors: %bb.1(0x80000000)
83+
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
84+
SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5)
85+
S_NOP 0, implicit renamable $vgpr0_lo16
86+
SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5)
87+
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
88+
bb.1:
89+
successors: %bb.2(0x80000000)
90+
S_NOP 1
91+
bb.2:
92+
S_NOP 1
93+
$vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5)
94+
$vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5)
95+
S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
96+
...
97+
98+
---
99+
name: spill_restore_vgpr16_end_of_block
100+
tracksRegLiveness: true
101+
stack:
102+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
103+
- { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 }
104+
machineFunctionInfo:
105+
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
106+
stackPtrOffsetReg: $sgpr32
107+
hasSpilledVGPRs: true
108+
body: |
109+
; EXPANDED-LABEL: name: spill_restore_vgpr16_end_of_block
110+
; EXPANDED: bb.0:
111+
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
112+
; EXPANDED-NEXT: {{ $}}
113+
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
114+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
115+
; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16
116+
; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
117+
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
118+
; EXPANDED-NEXT: {{ $}}
119+
; EXPANDED-NEXT: bb.1:
120+
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
121+
; EXPANDED-NEXT: {{ $}}
122+
; EXPANDED-NEXT: S_NOP 1
123+
; EXPANDED-NEXT: {{ $}}
124+
; EXPANDED-NEXT: bb.2:
125+
; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
126+
; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
127+
bb.0:
128+
successors: %bb.1(0x80000000)
129+
S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
130+
SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5)
131+
S_NOP 0, implicit renamable $vgpr0_lo16
132+
SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5)
133+
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
134+
bb.1:
135+
successors: %bb.2(0x80000000)
136+
S_NOP 1
137+
bb.2:
138+
$vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5)
139+
$vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5)
140+
...

0 commit comments

Comments
 (0)