Skip to content

Commit 704f303

Browse files
jwanggit86Jun Wang
andauthored
[AMDGPU] Set register bank for i1 register copies (#96155)
Set register bank in copies between a physical register and a virtual register of s1 type. --------- Co-authored-by: Jun Wang <[email protected]>
1 parent 9616399 commit 704f303

File tree

2 files changed

+314
-5
lines changed

2 files changed

+314
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3739,17 +3739,28 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
37393739
const MachineRegisterInfo &MRI = MF.getRegInfo();
37403740

37413741
if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {
3742+
Register DstReg = MI.getOperand(0).getReg();
3743+
Register SrcReg = MI.getOperand(1).getReg();
3744+
37423745
// The default logic bothers to analyze impossible alternative mappings. We
37433746
// want the most straightforward mapping, so just directly handle this.
3744-
const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI,
3745-
*TRI);
3746-
const RegisterBank *SrcBank = getRegBank(MI.getOperand(1).getReg(), MRI,
3747-
*TRI);
3747+
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
3748+
const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
37483749
assert(SrcBank && "src bank should have been assigned already");
3750+
3751+
// For COPY between a physical reg and an s1, there is no type associated so
3752+
// we need to take the virtual register's type as a hint on how to interpret
3753+
// s1 values.
3754+
if (!SrcReg.isVirtual() && !DstBank &&
3755+
MRI.getType(DstReg) == LLT::scalar(1))
3756+
DstBank = &AMDGPU::VCCRegBank;
3757+
else if (!DstReg.isVirtual() && MRI.getType(SrcReg) == LLT::scalar(1))
3758+
DstBank = &AMDGPU::VCCRegBank;
3759+
37493760
if (!DstBank)
37503761
DstBank = SrcBank;
37513762

3752-
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
3763+
unsigned Size = getSizeInBits(DstReg, MRI, *TRI);
37533764
if (MI.getOpcode() != AMDGPU::G_FREEZE &&
37543765
cannotCopy(*DstBank, *SrcBank, TypeSize::getFixed(Size)))
37553766
return getInvalidInstructionMapping();

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck --check-prefix=WAVE32 %s
5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck --check-prefix=WAVE32 %s
46

57
---
68
name: copy_s32_vgpr_to_vgpr
@@ -201,3 +203,299 @@ body: |
201203
%2:vcc(s1) = COPY %1
202204
S_ENDPGM 0, implicit %2
203205
...
206+
207+
---
208+
name: wave64_copy_sgpr_64_to_s1
209+
legalized: true
210+
211+
body: |
212+
bb.0:
213+
liveins: $sgpr4_sgpr5
214+
; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1
215+
; CHECK: liveins: $sgpr4_sgpr5
216+
; CHECK-NEXT: {{ $}}
217+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
218+
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
219+
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
220+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
221+
%0:_(s1) = COPY $sgpr4_sgpr5
222+
%1:_(s32) = G_ZEXT %0:_(s1)
223+
...
224+
225+
---
226+
name: wave32_copy_sgpr_32_to_s1
227+
legalized: true
228+
229+
body: |
230+
bb.0:
231+
liveins: $sgpr0
232+
; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1
233+
; WAVE32: liveins: $sgpr0
234+
; WAVE32-NEXT: {{ $}}
235+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
236+
; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
237+
; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
238+
; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
239+
%0:_(s1) = COPY $sgpr0
240+
%1:_(s32) = G_ZEXT %0:_(s1)
241+
...
242+
243+
---
244+
name: wave64_copy2_sgpr_64_to_s1
245+
legalized: true
246+
247+
body: |
248+
bb.0:
249+
liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
250+
; CHECK-LABEL: name: wave64_copy2_sgpr_64_to_s1
251+
; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
252+
; CHECK-NEXT: {{ $}}
253+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
254+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr6_sgpr7
255+
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
256+
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
257+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
258+
; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
259+
; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
260+
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
261+
%0:_(s1) = COPY $sgpr4_sgpr5
262+
%1:_(s1) = COPY $sgpr6_sgpr7
263+
%2:_(s32) = G_ZEXT %0:_(s1)
264+
%3:_(s32) = G_ZEXT %1:_(s1)
265+
...
266+
267+
---
268+
name: wave32_copy2_sgpr_32_to_s1
269+
legalized: true
270+
271+
body: |
272+
bb.0:
273+
liveins: $sgpr0, $sgpr1
274+
; WAVE32-LABEL: name: wave32_copy2_sgpr_32_to_s1
275+
; WAVE32: liveins: $sgpr0, $sgpr1
276+
; WAVE32-NEXT: {{ $}}
277+
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr0
278+
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr1
279+
; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
280+
; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
281+
; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
282+
; WAVE32-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
283+
; WAVE32-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
284+
; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
285+
%0:_(s1) = COPY $sgpr0
286+
%1:_(s1) = COPY $sgpr1
287+
%2:_(s32) = G_ZEXT %0:_(s1)
288+
%3:_(s32) = G_ZEXT %1:_(s1)
289+
...
290+
291+
---
292+
name: copy_sgpr_64_to_s1_vgpr
293+
legalized: true
294+
295+
body: |
296+
bb.0:
297+
liveins: $sgpr4_sgpr5
298+
; CHECK-LABEL: name: copy_sgpr_64_to_s1_vgpr
299+
; CHECK: liveins: $sgpr4_sgpr5
300+
; CHECK-NEXT: {{ $}}
301+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
302+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
303+
;
304+
; WAVE32-LABEL: name: copy_sgpr_64_to_s1_vgpr
305+
; WAVE32: liveins: $sgpr4_sgpr5
306+
; WAVE32-NEXT: {{ $}}
307+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
308+
; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
309+
%0:vgpr(s1) = COPY $sgpr4_sgpr5
310+
%1:_(s32) = G_ZEXT %0:vgpr(s1)
311+
...
312+
313+
---
314+
name: copy_sgpr_32_to_s1_vgpr
315+
legalized: true
316+
317+
body: |
318+
bb.0:
319+
liveins: $sgpr0
320+
; CHECK-LABEL: name: copy_sgpr_32_to_s1_vgpr
321+
; CHECK: liveins: $sgpr0
322+
; CHECK-NEXT: {{ $}}
323+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
324+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
325+
;
326+
; WAVE32-LABEL: name: copy_sgpr_32_to_s1_vgpr
327+
; WAVE32: liveins: $sgpr0
328+
; WAVE32-NEXT: {{ $}}
329+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
330+
; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
331+
%0:vgpr(s1) = COPY $sgpr0
332+
%1:_(s32) = G_ZEXT %0:vgpr(s1)
333+
...
334+
335+
---
336+
name: wave64_copy_sgpr_64_to_s1_vcc
337+
legalized: true
338+
339+
body: |
340+
bb.0:
341+
liveins: $sgpr4_sgpr5
342+
; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1_vcc
343+
; CHECK: liveins: $sgpr4_sgpr5
344+
; CHECK-NEXT: {{ $}}
345+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
346+
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
347+
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
348+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
349+
%0:vcc(s1) = COPY $sgpr4_sgpr5
350+
%1:_(s32) = G_ZEXT %0:vcc(s1)
351+
...
352+
353+
---
354+
name: wave32_copy_sgpr_32_to_s1_vcc
355+
legalized: true
356+
357+
body: |
358+
bb.0:
359+
liveins: $sgpr0
360+
; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1_vcc
361+
; WAVE32: liveins: $sgpr0
362+
; WAVE32-NEXT: {{ $}}
363+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
364+
; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
365+
; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
366+
; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
367+
%0:vcc(s1) = COPY $sgpr0
368+
%1:_(s32) = G_ZEXT %0:vcc(s1)
369+
...
370+
371+
---
372+
name: copy_virt_reg_to_s1
373+
legalized: true
374+
375+
body: |
376+
bb.0:
377+
liveins: $vgpr0
378+
; CHECK-LABEL: name: copy_virt_reg_to_s1
379+
; CHECK: liveins: $vgpr0
380+
; CHECK-NEXT: {{ $}}
381+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
382+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
383+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
384+
;
385+
; WAVE32-LABEL: name: copy_virt_reg_to_s1
386+
; WAVE32: liveins: $vgpr0
387+
; WAVE32-NEXT: {{ $}}
388+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
389+
; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
390+
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
391+
%0:_(s32) = COPY $vgpr0
392+
%1:_(s1) = G_TRUNC %0
393+
%2:_(s1) = COPY %1
394+
...
395+
396+
---
397+
name: copy_virt_reg_to_s1_vgpr
398+
legalized: true
399+
400+
body: |
401+
bb.0:
402+
liveins: $vgpr0
403+
; CHECK-LABEL: name: copy_virt_reg_to_s1_vgpr
404+
; CHECK: liveins: $vgpr0
405+
; CHECK-NEXT: {{ $}}
406+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
407+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
408+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
409+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
410+
;
411+
; WAVE32-LABEL: name: copy_virt_reg_to_s1_vgpr
412+
; WAVE32: liveins: $vgpr0
413+
; WAVE32-NEXT: {{ $}}
414+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
415+
; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
416+
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
417+
; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
418+
%0:_(s32) = COPY $vgpr0
419+
%1:_(s1) = G_TRUNC %0
420+
%2:vgpr(s1) = COPY %1
421+
%3:_(s1) = COPY %2
422+
...
423+
424+
425+
---
426+
name: copy_virt_reg_to_s1_vcc
427+
legalized: true
428+
429+
body: |
430+
bb.0:
431+
liveins: $vgpr0
432+
; CHECK-LABEL: name: copy_virt_reg_to_s1_vcc
433+
; CHECK: liveins: $vgpr0
434+
; CHECK-NEXT: {{ $}}
435+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
436+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
437+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
438+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
439+
;
440+
; WAVE32-LABEL: name: copy_virt_reg_to_s1_vcc
441+
; WAVE32: liveins: $vgpr0
442+
; WAVE32-NEXT: {{ $}}
443+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
444+
; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
445+
; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
446+
; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
447+
%0:_(s32) = COPY $vgpr0
448+
%1:_(s1) = G_TRUNC %0
449+
%2:vcc(s1) = COPY %1
450+
%3:_(s1) = COPY %2
451+
...
452+
453+
---
454+
name: copy_s1_to_sgpr_64
455+
legalized: true
456+
457+
body: |
458+
bb.0:
459+
liveins: $vgpr0
460+
; CHECK-LABEL: name: copy_s1_to_sgpr_64
461+
; CHECK: liveins: $vgpr0
462+
; CHECK-NEXT: {{ $}}
463+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
464+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
465+
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
466+
;
467+
; WAVE32-LABEL: name: copy_s1_to_sgpr_64
468+
; WAVE32: liveins: $vgpr0
469+
; WAVE32-NEXT: {{ $}}
470+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
471+
; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
472+
; WAVE32-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
473+
%0:_(s32) = COPY $vgpr0
474+
%1:_(s1) = G_TRUNC %0
475+
$sgpr4_sgpr5 = COPY %1
476+
...
477+
478+
---
479+
name: copy_s1_to_sgpr_32
480+
legalized: true
481+
482+
body: |
483+
bb.0:
484+
liveins: $vgpr0
485+
; CHECK-LABEL: name: copy_s1_to_sgpr_32
486+
; CHECK: liveins: $vgpr0
487+
; CHECK-NEXT: {{ $}}
488+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
489+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
490+
; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
491+
;
492+
; WAVE32-LABEL: name: copy_s1_to_sgpr_32
493+
; WAVE32: liveins: $vgpr0
494+
; WAVE32-NEXT: {{ $}}
495+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
496+
; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
497+
; WAVE32-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
498+
%0:_(s32) = COPY $vgpr0
499+
%1:_(s1) = G_TRUNC %0
500+
$sgpr0 = COPY %1
501+
...

0 commit comments

Comments
 (0)