Skip to content

Commit 12fe9b2

Browse files
committed
AMDGPU/GlobalISel: Select G_SEXT_INREG
1 parent 0693e82 commit 12fe9b2

File tree

4 files changed

+341
-24
lines changed

4 files changed

+341
-24
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,15 +1271,17 @@ const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
12711271
}
12721272

12731273
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
1274-
bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
1274+
bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;
1275+
bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;
12751276
const DebugLoc &DL = I.getDebugLoc();
12761277
MachineBasicBlock &MBB = *I.getParent();
12771278
const Register DstReg = I.getOperand(0).getReg();
12781279
const Register SrcReg = I.getOperand(1).getReg();
12791280

12801281
const LLT DstTy = MRI->getType(DstReg);
12811282
const LLT SrcTy = MRI->getType(SrcReg);
1282-
const unsigned SrcSize = SrcTy.getSizeInBits();
1283+
const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?
1284+
I.getOperand(2).getImm() : SrcTy.getSizeInBits();
12831285
const unsigned DstSize = DstTy.getSizeInBits();
12841286
if (!DstTy.isScalar())
12851287
return false;
@@ -1315,7 +1317,9 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
13151317
}
13161318

13171319
if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1318-
if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
1320+
const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
1321+
AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
1322+
if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
13191323
return false;
13201324

13211325
if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
@@ -1331,13 +1335,15 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
13311335
const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
13321336

13331337
// Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1334-
if (DstSize > 32 && SrcSize <= 32) {
1338+
if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
13351339
// We need a 64-bit register source, but the high bits don't matter.
13361340
Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
13371341
Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1342+
unsigned SubReg = InReg ? AMDGPU::sub0 : 0;
1343+
13381344
BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
13391345
BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1340-
.addReg(SrcReg)
1346+
.addReg(SrcReg, 0, SubReg)
13411347
.addImm(AMDGPU::sub0)
13421348
.addReg(UndefReg)
13431349
.addImm(AMDGPU::sub1);
@@ -1956,6 +1962,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
19561962
case TargetOpcode::G_SEXT:
19571963
case TargetOpcode::G_ZEXT:
19581964
case TargetOpcode::G_ANYEXT:
1965+
case TargetOpcode::G_SEXT_INREG:
19591966
if (selectImpl(I, *CoverageInfo))
19601967
return true;
19611968
return selectG_SZA_EXT(I);
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
6+
name: sext_inreg_sgpr_s32_1
7+
legalized: true
8+
regBankSelected: true
9+
body: |
10+
bb.0:
11+
liveins: $sgpr0
12+
13+
; GCN-LABEL: name: sext_inreg_sgpr_s32_1
14+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
15+
; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
16+
; GCN: $sgpr0 = COPY [[S_BFE_I32_]]
17+
%0:sgpr(s32) = COPY $sgpr0
18+
%1:sgpr(s32) = G_SEXT_INREG %0, 1
19+
$sgpr0 = COPY %1
20+
...
21+
22+
---
23+
24+
name: sext_inreg_sgpr_s32_2
25+
legalized: true
26+
regBankSelected: true
27+
body: |
28+
bb.0:
29+
liveins: $sgpr0
30+
31+
; GCN-LABEL: name: sext_inreg_sgpr_s32_2
32+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
33+
; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 131072, implicit-def $scc
34+
; GCN: $sgpr0 = COPY [[S_BFE_I32_]]
35+
%0:sgpr(s32) = COPY $sgpr0
36+
%1:sgpr(s32) = G_SEXT_INREG %0, 2
37+
$sgpr0 = COPY %1
38+
...
39+
40+
---
41+
42+
name: sext_inreg_sgpr_s32_8
43+
legalized: true
44+
regBankSelected: true
45+
body: |
46+
bb.0:
47+
liveins: $sgpr0
48+
49+
; GCN-LABEL: name: sext_inreg_sgpr_s32_8
50+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
51+
; GCN: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[COPY]]
52+
; GCN: $sgpr0 = COPY [[S_SEXT_I32_I8_]]
53+
%0:sgpr(s32) = COPY $sgpr0
54+
%1:sgpr(s32) = G_SEXT_INREG %0, 8
55+
$sgpr0 = COPY %1
56+
...
57+
58+
---
59+
60+
name: sext_inreg_sgpr_s32_16
61+
legalized: true
62+
regBankSelected: true
63+
body: |
64+
bb.0:
65+
liveins: $sgpr0
66+
67+
; GCN-LABEL: name: sext_inreg_sgpr_s32_16
68+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
69+
; GCN: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]]
70+
; GCN: $sgpr0 = COPY [[S_SEXT_I32_I16_]]
71+
%0:sgpr(s32) = COPY $sgpr0
72+
%1:sgpr(s32) = G_SEXT_INREG %0, 16
73+
$sgpr0 = COPY %1
74+
...
75+
76+
---
77+
78+
name: sext_inreg_sgpr_s32_31
79+
legalized: true
80+
regBankSelected: true
81+
body: |
82+
bb.0:
83+
liveins: $sgpr0
84+
85+
; GCN-LABEL: name: sext_inreg_sgpr_s32_31
86+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
87+
; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 2031616, implicit-def $scc
88+
; GCN: $sgpr0 = COPY [[S_BFE_I32_]]
89+
%0:sgpr(s32) = COPY $sgpr0
90+
%1:sgpr(s32) = G_SEXT_INREG %0, 31
91+
$sgpr0 = COPY %1
92+
...
93+
94+
---
95+
96+
name: sext_inreg_sgpr_s64_1
97+
legalized: true
98+
regBankSelected: true
99+
body: |
100+
bb.0:
101+
liveins: $sgpr0_sgpr1
102+
103+
; GCN-LABEL: name: sext_inreg_sgpr_s64_1
104+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
105+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
106+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
107+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc
108+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
109+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
110+
%1:sgpr(s64) = G_SEXT_INREG %0, 1
111+
$sgpr0_sgpr1 = COPY %1
112+
...
113+
114+
---
115+
116+
name: sext_inreg_sgpr_s64_2
117+
legalized: true
118+
regBankSelected: true
119+
body: |
120+
bb.0:
121+
liveins: $sgpr0_sgpr1
122+
123+
; GCN-LABEL: name: sext_inreg_sgpr_s64_2
124+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
125+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
126+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
127+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 131072, implicit-def $scc
128+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
129+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
130+
%1:sgpr(s64) = G_SEXT_INREG %0, 2
131+
$sgpr0_sgpr1 = COPY %1
132+
...
133+
134+
---
135+
136+
name: sext_inreg_sgpr_s64_8
137+
legalized: true
138+
regBankSelected: true
139+
body: |
140+
bb.0:
141+
liveins: $sgpr0_sgpr1
142+
143+
; GCN-LABEL: name: sext_inreg_sgpr_s64_8
144+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
145+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
146+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
147+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 524288, implicit-def $scc
148+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
149+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
150+
%1:sgpr(s64) = G_SEXT_INREG %0, 8
151+
$sgpr0_sgpr1 = COPY %1
152+
...
153+
154+
---
155+
156+
name: sext_inreg_sgpr_s64_16
157+
legalized: true
158+
regBankSelected: true
159+
body: |
160+
bb.0:
161+
liveins: $sgpr0_sgpr1
162+
163+
; GCN-LABEL: name: sext_inreg_sgpr_s64_16
164+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
165+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
166+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
167+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
168+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
169+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
170+
%1:sgpr(s64) = G_SEXT_INREG %0, 16
171+
$sgpr0_sgpr1 = COPY %1
172+
...
173+
174+
---
175+
176+
name: sext_inreg_sgpr_s64_31
177+
legalized: true
178+
regBankSelected: true
179+
body: |
180+
bb.0:
181+
liveins: $sgpr0_sgpr1
182+
183+
; GCN-LABEL: name: sext_inreg_sgpr_s64_31
184+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
185+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
186+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
187+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2031616, implicit-def $scc
188+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
189+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
190+
%1:sgpr(s64) = G_SEXT_INREG %0, 31
191+
$sgpr0_sgpr1 = COPY %1
192+
...
193+
194+
# Ideally this degenerate case would have been replaceed with a 32-bit shift by combines.
195+
---
196+
197+
name: sext_inreg_sgpr_s64_32
198+
legalized: true
199+
regBankSelected: true
200+
body: |
201+
bb.0:
202+
liveins: $sgpr0_sgpr1
203+
204+
; GCN-LABEL: name: sext_inreg_sgpr_s64_32
205+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
206+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
207+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
208+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc
209+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
210+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
211+
%1:sgpr(s64) = G_SEXT_INREG %0, 32
212+
$sgpr0_sgpr1 = COPY %1
213+
...
214+
215+
---
216+
217+
name: sext_inreg_sgpr_s64_63
218+
legalized: true
219+
regBankSelected: true
220+
body: |
221+
bb.0:
222+
liveins: $sgpr0_sgpr1
223+
224+
; GCN-LABEL: name: sext_inreg_sgpr_s64_63
225+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
226+
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
227+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1
228+
; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 4128768, implicit-def $scc
229+
; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
230+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
231+
%1:sgpr(s64) = G_SEXT_INREG %0, 63
232+
$sgpr0_sgpr1 = COPY %1
233+
...
234+
235+
---
236+
237+
name: sext_inreg_vgpr_s32_1
238+
legalized: true
239+
regBankSelected: true
240+
body: |
241+
bb.0:
242+
liveins: $vgpr0
243+
244+
; GCN-LABEL: name: sext_inreg_vgpr_s32_1
245+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
246+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 1, implicit $exec
247+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
248+
%0:vgpr(s32) = COPY $vgpr0
249+
%1:vgpr(s32) = G_SEXT_INREG %0, 1
250+
$vgpr0 = COPY %1
251+
...
252+
253+
---
254+
255+
name: sext_inreg_vgpr_s32_2
256+
legalized: true
257+
regBankSelected: true
258+
body: |
259+
bb.0:
260+
liveins: $vgpr0
261+
262+
; GCN-LABEL: name: sext_inreg_vgpr_s32_2
263+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
264+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 2, implicit $exec
265+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
266+
%0:vgpr(s32) = COPY $vgpr0
267+
%1:vgpr(s32) = G_SEXT_INREG %0, 2
268+
$vgpr0 = COPY %1
269+
...
270+
271+
---
272+
273+
name: sext_inreg_vgpr_s32_8
274+
legalized: true
275+
regBankSelected: true
276+
body: |
277+
bb.0:
278+
liveins: $vgpr0
279+
280+
; GCN-LABEL: name: sext_inreg_vgpr_s32_8
281+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
282+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 8, implicit $exec
283+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
284+
%0:vgpr(s32) = COPY $vgpr0
285+
%1:vgpr(s32) = G_SEXT_INREG %0, 8
286+
$vgpr0 = COPY %1
287+
...
288+
289+
---
290+
291+
name: sext_inreg_vgpr_s32_16
292+
legalized: true
293+
regBankSelected: true
294+
body: |
295+
bb.0:
296+
liveins: $vgpr0
297+
298+
; GCN-LABEL: name: sext_inreg_vgpr_s32_16
299+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
300+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 16, implicit $exec
301+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
302+
%0:vgpr(s32) = COPY $vgpr0
303+
%1:vgpr(s32) = G_SEXT_INREG %0, 16
304+
$vgpr0 = COPY %1
305+
...
306+
307+
---
308+
309+
name: sext_inreg_vgpr_s32_31
310+
legalized: true
311+
regBankSelected: true
312+
body: |
313+
bb.0:
314+
liveins: $vgpr0
315+
316+
; GCN-LABEL: name: sext_inreg_vgpr_s32_31
317+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
318+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 31, implicit $exec
319+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
320+
%0:vgpr(s32) = COPY $vgpr0
321+
%1:vgpr(s32) = G_SEXT_INREG %0, 31
322+
$vgpr0 = COPY %1
323+
...

0 commit comments

Comments
 (0)