Skip to content

Commit d3d78d1

Browse files
author
Jun Wang
committed
In this commit (1) comments are updated (2) only set reg bank for
dst instead of for both src and dst (3) add run line for GFX10 in test file.
1 parent 19a291f commit d3d78d1

File tree

2 files changed

+131
-16
lines changed

2 files changed

+131
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3735,29 +3735,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
37353735
const MachineRegisterInfo &MRI = MF.getRegInfo();
37363736

37373737
if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {
3738+
Register DstReg = MI.getOperand(0).getReg();
3739+
Register SrcReg = MI.getOperand(1).getReg();
3740+
37383741
// The default logic bothers to analyze impossible alternative mappings. We
37393742
// want the most straightforward mapping, so just directly handle this.
3740-
const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI,
3741-
*TRI);
3742-
const RegisterBank *SrcBank = getRegBank(MI.getOperand(1).getReg(), MRI,
3743-
*TRI);
3743+
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
3744+
const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
37443745
assert(SrcBank && "src bank should have been assigned already");
37453746

3746-
// For copy from a physical reg to s1 dest, the call of getRegBank() above
3747-
// gives incorrect result. We set both src and dst banks to VCCRegBank.
3748-
if (!MI.getOperand(1).getReg().isVirtual() && !DstBank &&
3749-
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(1))
3750-
DstBank = SrcBank = &AMDGPU::VCCRegBank;
3751-
// For copy from s1 src to a physical reg, we set both src and dst banks to
3752-
// VCCRegBank.
3753-
else if (!MI.getOperand(0).getReg().isVirtual() &&
3754-
MRI.getType(MI.getOperand(1).getReg()) == LLT::scalar(1))
3755-
DstBank = SrcBank = &AMDGPU::VCCRegBank;
3747+
// For COPY between a physical reg and an s1, set dst bank to VCCRegBank
3748+
// so that the copy is allowed.
3749+
if (!SrcReg.isVirtual() && !DstBank &&
3750+
MRI.getType(DstReg) == LLT::scalar(1))
3751+
DstBank = &AMDGPU::VCCRegBank;
3752+
else if (!DstReg.isVirtual() && MRI.getType(SrcReg) == LLT::scalar(1))
3753+
DstBank = &AMDGPU::VCCRegBank;
37563754

37573755
if (!DstBank)
37583756
DstBank = SrcBank;
37593757

3760-
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
3758+
unsigned Size = getSizeInBits(DstReg, MRI, *TRI);
37613759
if (MI.getOpcode() != AMDGPU::G_FREEZE &&
37623760
cannotCopy(*DstBank, *SrcBank, TypeSize::getFixed(Size)))
37633761
return getInvalidInstructionMapping();

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck --check-prefix=GFX10 %s
5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck --check-prefix=GFX10 %s
46

57
---
68
name: copy_s32_vgpr_to_vgpr
@@ -216,6 +218,14 @@ body: |
216218
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
217219
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
218220
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
221+
;
222+
; GFX10-LABEL: name: copy_sgpr_64_to_s1
223+
; GFX10: liveins: $sgpr4_sgpr5
224+
; GFX10-NEXT: {{ $}}
225+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
226+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
227+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
228+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
219229
%0:_(s1) = COPY $sgpr4_sgpr5
220230
%1:_(s32) = G_ZEXT %0:_(s1)
221231
...
@@ -234,6 +244,14 @@ body: |
234244
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
235245
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
236246
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
247+
;
248+
; GFX10-LABEL: name: copy_sgpr_32_to_s1
249+
; GFX10: liveins: $sgpr0
250+
; GFX10-NEXT: {{ $}}
251+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
252+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
253+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
254+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
237255
%0:_(s1) = COPY $sgpr0
238256
%1:_(s32) = G_ZEXT %0:_(s1)
239257
...
@@ -256,6 +274,18 @@ body: |
256274
; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
257275
; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
258276
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
277+
;
278+
; GFX10-LABEL: name: copy2_sgpr_64_to_s1
279+
; GFX10: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
280+
; GFX10-NEXT: {{ $}}
281+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
282+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr6_sgpr7
283+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
284+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
285+
; GFX10-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
286+
; GFX10-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
287+
; GFX10-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
288+
; GFX10-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
259289
%0:_(s1) = COPY $sgpr4_sgpr5
260290
%1:_(s1) = COPY $sgpr6_sgpr7
261291
%2:_(s32) = G_ZEXT %0:_(s1)
@@ -280,6 +310,18 @@ body: |
280310
; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
281311
; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
282312
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
313+
;
314+
; GFX10-LABEL: name: copy2_sgpr_32_to_s1
315+
; GFX10: liveins: $sgpr0, $sgpr1
316+
; GFX10-NEXT: {{ $}}
317+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr0
318+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr1
319+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
320+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
321+
; GFX10-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
322+
; GFX10-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
323+
; GFX10-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
324+
; GFX10-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
283325
%0:_(s1) = COPY $sgpr0
284326
%1:_(s1) = COPY $sgpr1
285327
%2:_(s32) = G_ZEXT %0:_(s1)
@@ -303,6 +345,17 @@ body: |
303345
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
304346
; CHECK-NEXT: [[CONST3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
305347
; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[CONST3]]
348+
;
349+
; GFX10-LABEL: name: copy_sgpr_64_and_sgpr_32_to_s1
350+
; GFX10: liveins: $sgpr6, $sgpr4_sgpr5
351+
; GFX10-NEXT: {{ $}}
352+
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
353+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
354+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
355+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
356+
; GFX10-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
357+
; GFX10-NEXT: [[CONST3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
358+
; GFX10-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[CONST3]]
306359
%0:_(s1) = COPY $sgpr4_sgpr5
307360
%2:_(s32) = COPY $sgpr6
308361
%7:_(s32) = G_ZEXT %0:_(s1)
@@ -322,6 +375,12 @@ body: |
322375
; CHECK-NEXT: {{ $}}
323376
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
324377
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
378+
;
379+
; GFX10-LABEL: name: copy_sgpr_64_to_s1_vgpr
380+
; GFX10: liveins: $sgpr4_sgpr5
381+
; GFX10-NEXT: {{ $}}
382+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
383+
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
325384
%0:vgpr(s1) = COPY $sgpr4_sgpr5
326385
%1:_(s32) = G_ZEXT %0:vgpr(s1)
327386
...
@@ -338,6 +397,12 @@ body: |
338397
; CHECK-NEXT: {{ $}}
339398
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
340399
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
400+
;
401+
; GFX10-LABEL: name: copy_sgpr_32_to_s1_vgpr
402+
; GFX10: liveins: $sgpr0
403+
; GFX10-NEXT: {{ $}}
404+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
405+
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
341406
%0:vgpr(s1) = COPY $sgpr0
342407
%1:_(s32) = G_ZEXT %0:vgpr(s1)
343408
...
@@ -356,6 +421,14 @@ body: |
356421
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
357422
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
358423
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
424+
;
425+
; GFX10-LABEL: name: copy_sgpr_64_to_s1_vcc
426+
; GFX10: liveins: $sgpr4_sgpr5
427+
; GFX10-NEXT: {{ $}}
428+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
429+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
430+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
431+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
359432
%0:vcc(s1) = COPY $sgpr4_sgpr5
360433
%1:_(s32) = G_ZEXT %0:vcc(s1)
361434
...
@@ -374,6 +447,14 @@ body: |
374447
; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
375448
; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
376449
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
450+
;
451+
; GFX10-LABEL: name: copy_sgpr_32_to_s1_vcc
452+
; GFX10: liveins: $sgpr0
453+
; GFX10-NEXT: {{ $}}
454+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
455+
; GFX10-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
456+
; GFX10-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
457+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
377458
%0:vcc(s1) = COPY $sgpr0
378459
%1:_(s32) = G_ZEXT %0:vcc(s1)
379460
...
@@ -391,6 +472,13 @@ body: |
391472
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
392473
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
393474
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
475+
;
476+
; GFX10-LABEL: name: copy_virt_reg_to_s1
477+
; GFX10: liveins: $vgpr0
478+
; GFX10-NEXT: {{ $}}
479+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
480+
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
481+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
394482
%0:_(s32) = COPY $vgpr0
395483
%1:_(s1) = G_TRUNC %0
396484
%2:_(s1) = COPY %1
@@ -410,6 +498,14 @@ body: |
410498
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
411499
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
412500
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
501+
;
502+
; GFX10-LABEL: name: copy_virt_reg_to_s1_vgpr
503+
; GFX10: liveins: $vgpr0
504+
; GFX10-NEXT: {{ $}}
505+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
506+
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
507+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
508+
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
413509
%0:_(s32) = COPY $vgpr0
414510
%1:_(s1) = G_TRUNC %0
415511
%2:vgpr(s1) = COPY %1
@@ -431,6 +527,14 @@ body: |
431527
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
432528
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
433529
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
530+
;
531+
; GFX10-LABEL: name: copy_virt_reg_to_s1_vcc
532+
; GFX10: liveins: $vgpr0
533+
; GFX10-NEXT: {{ $}}
534+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
535+
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
536+
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
537+
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
434538
%0:_(s32) = COPY $vgpr0
435539
%1:_(s1) = G_TRUNC %0
436540
%2:vcc(s1) = COPY %1
@@ -450,6 +554,13 @@ body: |
450554
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
451555
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
452556
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
557+
;
558+
; GFX10-LABEL: name: copy_s1_to_sgpr_64
559+
; GFX10: liveins: $vgpr0
560+
; GFX10-NEXT: {{ $}}
561+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
562+
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
563+
; GFX10-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
453564
%0:_(s32) = COPY $vgpr0
454565
%1:_(s1) = G_TRUNC %0
455566
$sgpr4_sgpr5 = COPY %1
@@ -468,8 +579,14 @@ body: |
468579
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
469580
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
470581
; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
582+
;
583+
; GFX10-LABEL: name: copy_s1_to_sgpr_32
584+
; GFX10: liveins: $vgpr0
585+
; GFX10-NEXT: {{ $}}
586+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
587+
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
588+
; GFX10-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
471589
%0:_(s32) = COPY $vgpr0
472590
%1:_(s1) = G_TRUNC %0
473591
$sgpr0 = COPY %1
474592
...
475-

0 commit comments

Comments
 (0)