Skip to content

Commit 7425af4

Browse files
authored
AMDGPU: Add pseudoinstruction for agpr or vgpr constants (#130042)
1 parent 2e53856 commit 7425af4

File tree

9 files changed

+590
-1
lines changed

9 files changed

+590
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
13371337
case AMDGPU::S_MOV_B64:
13381338
case AMDGPU::V_MOV_B64_e32:
13391339
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1340+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
13401341
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
13411342
case AMDGPU::V_MOV_B64_PSEUDO: {
13421343
const MachineOperand &Src0 = MI.getOperand(1);
@@ -2186,7 +2187,13 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21862187
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
21872188
&AMDGPU::SReg_32_XM0RegClass);
21882189
break;
2189-
2190+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2191+
Register Dst = MI.getOperand(0).getReg();
2192+
bool IsAGPR = SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst));
2193+
MI.setDesc(
2194+
get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2195+
break;
2196+
}
21902197
case AMDGPU::V_MOV_B64_PSEUDO: {
21912198
Register Dst = MI.getOperand(0).getReg();
21922199
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -3423,6 +3430,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
34233430
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
34243431
case AMDGPU::V_ACCVGPR_READ_B32_e64:
34253432
case AMDGPU::V_ACCVGPR_MOV_B32:
3433+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
34263434
return true;
34273435
default:
34283436
return false;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
140140
let UseNamedOperandTable = 1;
141141
}
142142

143+
// 32-bit materialize immediate which supports AGPR or VGPR. Typically
144+
// this should just expand to V_MOV_B32, unless $vdst happens to be
145+
// allocated to an AGPR in which case it will lower to
146+
// V_ACCVGPR_WRITE_B32. This should always use an inline immediate
147+
// operand, as v_accvgpr_write_b32 does not support literal constants.
148+
def AV_MOV_B32_IMM_PSEUDO
149+
: VPseudoInstSI<(outs AV_32:$vdst), (ins VCSrc_b32:$src0)> {
150+
let isReMaterializable = 1;
151+
let isAsCheapAsAMove = 1;
152+
153+
// Imprecise, technically if AGPR it's VOP3 and VOP1 for AGPR. But
154+
// this tricks the rematerialize logic into working for it.
155+
let VOP3 = 1;
156+
let isMoveImm = 1;
157+
let SchedRW = [Write32Bit];
158+
let Size = 4;
159+
let UseNamedOperandTable = 1;
160+
}
161+
143162
// 64-bit vector move with dpp. Expanded post-RA.
144163
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
145164
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=postrapseudos %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=postrapseudos %s -o - | FileCheck %s
4+
5+
---
6+
name: av_mov_b32_imm_pseudo_agpr_0
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0:
10+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_0
11+
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
12+
$agpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
13+
...
14+
15+
---
16+
name: av_mov_b32_imm_pseudo_agpr_64
17+
tracksRegLiveness: true
18+
body: |
19+
bb.0:
20+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_64
21+
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
22+
$agpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
23+
...
24+
25+
---
26+
name: av_mov_b32_imm_pseudo_vgpr_0
27+
tracksRegLiveness: true
28+
body: |
29+
bb.0:
30+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_0
31+
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
32+
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
33+
...
34+
35+
---
36+
name: av_mov_b32_imm_pseudo_vgpr_64
37+
tracksRegLiveness: true
38+
body: |
39+
bb.0:
40+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_64
41+
; CHECK: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
42+
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
43+
...
44+
45+
---
46+
name: av_mov_b32_imm_pseudo_agpr_vgpr
47+
tracksRegLiveness: true
48+
body: |
49+
bb.0:
50+
liveins: $vgpr0
51+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_vgpr
52+
; CHECK: liveins: $vgpr0
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
55+
$agpr1 = AV_MOV_B32_IMM_PSEUDO $vgpr0, implicit $exec
56+
...

llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,128 @@ body: |
109109
S_ENDPGM 0
110110
111111
...
112+
113+
---
114+
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
115+
tracksRegLiveness: true
116+
body: |
117+
bb.0:
118+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
119+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
120+
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
121+
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
122+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
123+
$agpr0 = COPY %0
124+
S_ENDPGM 0, implicit $agpr0
125+
126+
...
127+
128+
---
129+
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
130+
tracksRegLiveness: true
131+
body: |
132+
bb.0:
133+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
134+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
135+
; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
136+
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
137+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
138+
$vgpr0 = COPY %0
139+
S_ENDPGM 0, implicit $vgpr0
140+
141+
...
142+
143+
---
144+
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
145+
tracksRegLiveness: true
146+
body: |
147+
bb.0:
148+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
149+
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
150+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
151+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
152+
%1:agpr_32 = COPY %0
153+
S_ENDPGM 0, implicit %1
154+
155+
...
156+
157+
---
158+
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
159+
tracksRegLiveness: true
160+
body: |
161+
bb.0:
162+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
163+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
164+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
165+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
166+
%1:vgpr_32 = COPY %0
167+
S_ENDPGM 0, implicit %1
168+
169+
...
170+
171+
---
172+
name: v_mov_b32_imm_literal_copy_v_to_agpr_32
173+
tracksRegLiveness: true
174+
body: |
175+
bb.0:
176+
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
177+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
178+
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
179+
; GCN-NEXT: $agpr0 = COPY [[COPY]]
180+
; GCN-NEXT: S_ENDPGM 0
181+
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
182+
%1:agpr_32 = COPY %0
183+
$agpr0 = COPY %1
184+
S_ENDPGM 0
185+
186+
...
187+
188+
# FIXME: Register class restrictions of av register not respected,
189+
# issue 130020
190+
191+
# ---
192+
# name: s_mov_b32_inlineimm_copy_s_to_av_32
193+
# tracksRegLiveness: true
194+
# body: |
195+
# bb.0:
196+
# %0:sreg_32 = S_MOV_B32 32
197+
# %1:av_32 = COPY %0
198+
# $agpr0 = COPY %1
199+
# S_ENDPGM 0
200+
201+
# ...
202+
203+
# ---
204+
# name: v_mov_b32_inlineimm_copy_v_to_av_32
205+
# tracksRegLiveness: true
206+
# body: |
207+
# bb.0:
208+
# %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
209+
# %1:av_32 = COPY %0
210+
# $agpr0 = COPY %1
211+
# S_ENDPGM 0
212+
# ...
213+
214+
# ---
215+
# name: s_mov_b32_imm_literal_copy_s_to_av_32
216+
# tracksRegLiveness: true
217+
# body: |
218+
# bb.0:
219+
# %0:sreg_32 = S_MOV_B32 999
220+
# %1:av_32 = COPY %0
221+
# $agpr0 = COPY %1
222+
# S_ENDPGM 0
223+
224+
# ...
225+
226+
# ---
227+
# name: v_mov_b32_imm_literal_copy_v_to_av_32
228+
# tracksRegLiveness: true
229+
# body: |
230+
# bb.0:
231+
# %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
232+
# %1:av_32 = COPY %0
233+
# $agpr0 = COPY %1
234+
# S_ENDPGM 0
235+
236+
# ...
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=3 -stop-after=postrapseudos -o - -verify-regalloc %s | FileCheck %s
3+
4+
# Compare results of using V_MOV_B32 vs. AV_MOV_B32_IMM_PSEUDO during
5+
# allocation.
6+
7+
---
8+
name: av_mov_b32_split
9+
tracksRegLiveness: true
10+
machineFunctionInfo:
11+
isEntryFunction: true
12+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
13+
stackPtrOffsetReg: '$sgpr32'
14+
occupancy: 7
15+
body: |
16+
bb.0:
17+
liveins: $vgpr0, $sgpr4_sgpr5
18+
19+
; CHECK-LABEL: name: av_mov_b32_split
20+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
23+
; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
24+
; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
25+
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
26+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
27+
; CHECK-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
28+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 4, implicit $exec
29+
; CHECK-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
30+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
31+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
32+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
33+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
34+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec
35+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
36+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec
37+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
38+
%0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
39+
%1:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
40+
%2:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
41+
%3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 3, implicit $exec
42+
%4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 4, implicit $exec
43+
44+
%5:agpr_32 = COPY %0
45+
%6:agpr_32 = COPY %1
46+
%7:agpr_32 = COPY %2
47+
%8:agpr_32 = COPY %3
48+
%9:agpr_32 = COPY %4
49+
50+
S_NOP 0, implicit %5
51+
S_NOP 0, implicit %6
52+
S_NOP 0, implicit %7
53+
S_NOP 0, implicit %8
54+
S_NOP 0, implicit %9
55+
56+
...
57+
58+
---
59+
name: v_mov_b32_split
60+
tracksRegLiveness: true
61+
machineFunctionInfo:
62+
isEntryFunction: true
63+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
64+
stackPtrOffsetReg: '$sgpr32'
65+
occupancy: 7
66+
body: |
67+
bb.0:
68+
liveins: $vgpr0, $sgpr4_sgpr5
69+
70+
; CHECK-LABEL: name: v_mov_b32_split
71+
; CHECK: liveins: $vgpr0, $vgpr3, $vgpr4, $vgpr5, $sgpr4_sgpr5
72+
; CHECK-NEXT: {{ $}}
73+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
74+
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
75+
; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec
76+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
77+
; CHECK-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
78+
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec
79+
; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec
80+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
81+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
82+
; CHECK-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
83+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 4, implicit $exec
84+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
85+
; CHECK-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
86+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec
87+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
88+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
89+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
90+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec
91+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
92+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec
93+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
94+
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
95+
%1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
96+
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
97+
%3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
98+
%4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
99+
100+
%5:agpr_32 = COPY %0
101+
%6:agpr_32 = COPY %1
102+
%7:agpr_32 = COPY %2
103+
%8:agpr_32 = COPY %3
104+
%9:agpr_32 = COPY %4
105+
106+
S_NOP 0, implicit %5
107+
S_NOP 0, implicit %6
108+
S_NOP 0, implicit %7
109+
S_NOP 0, implicit %8
110+
S_NOP 0, implicit %9
111+
112+
...
113+

0 commit comments

Comments
 (0)