Skip to content

Commit ab1dce6

Browse files
committed
AMDGPU: Add pseudoinstruction for agpr or vgpr constants
Currently constants are materialized with v_mov_b32, which may fold into v_accvgpr_write_b32 if it happens to be copied into an AGPR use. This is fine until the register allocator wants to introduce temporary registers using the combined AV_ superclasses. Since each of these instructions is restricted to writing the specific subclass, they block instances where we could inflate the use register class. As v_accvgpr_write_b32 cannot use a literal constant, only inline immediate values should be used with the pseudo. Introduce a pseudo with a flexible result register class. Alternatively we would need to teach allocation about how to rewrite or rematerialize with a change of opcode which would require a lot more machinery. We may want a 64-bit variant, just in case we can make use of v_mov_b64. This does not yet attempt to make use of it, and only adds the boilerplate and tests on basic optimizations.
1 parent 00fdc52 commit ab1dce6

File tree

9 files changed

+544
-1
lines changed

9 files changed

+544
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
13371337
case AMDGPU::S_MOV_B64:
13381338
case AMDGPU::V_MOV_B64_e32:
13391339
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1340+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
13401341
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
13411342
case AMDGPU::V_MOV_B64_PSEUDO: {
13421343
const MachineOperand &Src0 = MI.getOperand(1);
@@ -2186,7 +2187,13 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21862187
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
21872188
&AMDGPU::SReg_32_XM0RegClass);
21882189
break;
2189-
2190+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2191+
Register Dst = MI.getOperand(0).getReg();
2192+
bool IsAGPR = SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst));
2193+
MI.setDesc(
2194+
get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2195+
break;
2196+
}
21902197
case AMDGPU::V_MOV_B64_PSEUDO: {
21912198
Register Dst = MI.getOperand(0).getReg();
21922199
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -3423,6 +3430,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
34233430
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
34243431
case AMDGPU::V_ACCVGPR_READ_B32_e64:
34253432
case AMDGPU::V_ACCVGPR_MOV_B32:
3433+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
34263434
return true;
34273435
default:
34283436
return false;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
140140
let UseNamedOperandTable = 1;
141141
}
142142

143+
// 32-bit materialize immediate which supports AGPR or VGPR. Typically
144+
// this should just expand to V_MOV_B32, unless $vdst happens to be
145+
// allocated to an AGPR in which case it will lower to
146+
// V_ACCVGPR_WRITE_B32. This should always use an inline immediate
147+
// operand, as v_accvgpr_write_b32 does not support literal constants.
148+
def AV_MOV_B32_IMM_PSEUDO
149+
: VPseudoInstSI<(outs AV_32:$vdst), (ins VCSrc_b32:$src0)> {
150+
let isReMaterializable = 1;
151+
let isAsCheapAsAMove = 1;
152+
153+
// Imprecise, technically if AGPR it's VOP3 and VOP1 for AGPR. But
154+
// this tricks the rematerialize logic into working for it.
155+
let VOP3 = 1;
156+
let isMoveImm = 1;
157+
let SchedRW = [Write32Bit];
158+
let Size = 4;
159+
let UseNamedOperandTable = 1;
160+
}
161+
143162
// 64-bit vector move with dpp. Expanded post-RA.
144163
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
145164
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=postrapseudos %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=postrapseudos %s -o - | FileCheck %s
4+
5+
---
6+
name: av_mov_b32_imm_pseudo_agpr_0
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0:
10+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_0
11+
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
12+
$agpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
13+
...
14+
15+
---
16+
name: av_mov_b32_imm_pseudo_agpr_64
17+
tracksRegLiveness: true
18+
body: |
19+
bb.0:
20+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_64
21+
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
22+
$agpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
23+
...
24+
25+
---
26+
name: av_mov_b32_imm_pseudo_vgpr_0
27+
tracksRegLiveness: true
28+
body: |
29+
bb.0:
30+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_0
31+
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
32+
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
33+
...
34+
35+
---
36+
name: av_mov_b32_imm_pseudo_vgpr_64
37+
tracksRegLiveness: true
38+
body: |
39+
bb.0:
40+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_64
41+
; CHECK: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
42+
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
43+
...
44+
45+
---
46+
name: av_mov_b32_imm_pseudo_agpr_vgpr
47+
tracksRegLiveness: true
48+
body: |
49+
bb.0:
50+
liveins: $vgpr0
51+
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_vgpr
52+
; CHECK: liveins: $vgpr0
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
55+
$agpr1 = AV_MOV_B32_IMM_PSEUDO $vgpr0, implicit $exec
56+
...

llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,78 @@ body: |
109109
S_ENDPGM 0
110110
111111
...
112+
113+
---
114+
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
115+
tracksRegLiveness: true
116+
body: |
117+
bb.0:
118+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
119+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
120+
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
121+
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
122+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
123+
$agpr0 = COPY %0
124+
S_ENDPGM 0, implicit $agpr0
125+
126+
...
127+
128+
---
129+
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
130+
tracksRegLiveness: true
131+
body: |
132+
bb.0:
133+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
134+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
135+
; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
136+
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
137+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
138+
$vgpr0 = COPY %0
139+
S_ENDPGM 0, implicit $vgpr0
140+
141+
...
142+
143+
---
144+
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
145+
tracksRegLiveness: true
146+
body: |
147+
bb.0:
148+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
149+
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
150+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
151+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
152+
%1:agpr_32 = COPY %0
153+
S_ENDPGM 0, implicit %1
154+
155+
...
156+
157+
---
158+
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
159+
tracksRegLiveness: true
160+
body: |
161+
bb.0:
162+
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
163+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
164+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
165+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
166+
%1:vgpr_32 = COPY %0
167+
S_ENDPGM 0, implicit %1
168+
169+
...
170+
171+
---
172+
name: v_mov_b32_imm_literal_copy_v_to_agpr_32
173+
tracksRegLiveness: true
174+
body: |
175+
bb.0:
176+
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
177+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
178+
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
179+
; GCN-NEXT: $agpr0 = COPY [[COPY]]
180+
; GCN-NEXT: S_ENDPGM 0
181+
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
182+
%1:agpr_32 = COPY %0
183+
$agpr0 = COPY %1
184+
S_ENDPGM 0
185+
186+
...
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=3 -stop-after=postrapseudos -o - -verify-regalloc %s | FileCheck %s
3+
4+
# Compare results of using V_MOV_B32 vs. AV_MOV_B32_IMM_PSEUDO during
5+
# allocation.
6+
7+
---
8+
name: av_mov_b32_split
9+
tracksRegLiveness: true
10+
machineFunctionInfo:
11+
isEntryFunction: true
12+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
13+
stackPtrOffsetReg: '$sgpr32'
14+
occupancy: 7
15+
vgprForAGPRCopy: '$vgpr255'
16+
sgprForEXECCopy: '$sgpr74_sgpr75'
17+
body: |
18+
bb.0:
19+
liveins: $vgpr0, $sgpr4_sgpr5
20+
21+
; CHECK-LABEL: name: av_mov_b32_split
22+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
25+
; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
26+
; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
27+
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
28+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
29+
; CHECK-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
30+
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 4, implicit $exec
31+
; CHECK-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
32+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
33+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
34+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
35+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
36+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec
37+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
38+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec
39+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
40+
%0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
41+
%1:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
42+
%2:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
43+
%3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 3, implicit $exec
44+
%4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 4, implicit $exec
45+
46+
%5:agpr_32 = COPY %0
47+
%6:agpr_32 = COPY %1
48+
%7:agpr_32 = COPY %2
49+
%8:agpr_32 = COPY %3
50+
%9:agpr_32 = COPY %4
51+
52+
S_NOP 0, implicit %5
53+
S_NOP 0, implicit %6
54+
S_NOP 0, implicit %7
55+
S_NOP 0, implicit %8
56+
S_NOP 0, implicit %9
57+
58+
...
59+
60+
---
61+
name: v_mov_b32_split
62+
tracksRegLiveness: true
63+
machineFunctionInfo:
64+
isEntryFunction: true
65+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
66+
stackPtrOffsetReg: '$sgpr32'
67+
occupancy: 7
68+
vgprForAGPRCopy: '$vgpr255'
69+
sgprForEXECCopy: '$sgpr74_sgpr75'
70+
body: |
71+
bb.0:
72+
liveins: $vgpr0, $sgpr4_sgpr5
73+
74+
; CHECK-LABEL: name: v_mov_b32_split
75+
; CHECK: liveins: $vgpr0, $vgpr3, $vgpr4, $vgpr5, $sgpr4_sgpr5
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
78+
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
79+
; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec
80+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
81+
; CHECK-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
82+
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec
83+
; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec
84+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
85+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
86+
; CHECK-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
87+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 4, implicit $exec
88+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
89+
; CHECK-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
90+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec
91+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
92+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
93+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
94+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec
95+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
96+
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec
97+
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
98+
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
99+
%1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
100+
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
101+
%3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
102+
%4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
103+
104+
%5:agpr_32 = COPY %0
105+
%6:agpr_32 = COPY %1
106+
%7:agpr_32 = COPY %2
107+
%8:agpr_32 = COPY %3
108+
%9:agpr_32 = COPY %4
109+
110+
S_NOP 0, implicit %5
111+
S_NOP 0, implicit %6
112+
S_NOP 0, implicit %7
113+
S_NOP 0, implicit %8
114+
S_NOP 0, implicit %9
115+
116+
...
117+

llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,68 @@ body: |
563563
SI_RETURN_TO_EPILOG %1
564564
565565
...
566+
567+
---
568+
name: fold_v_mov_b32_e32_literal_to_agpr
569+
body: |
570+
bb.0:
571+
; GCN-LABEL: name: fold_v_mov_b32_e32_literal_to_agpr
572+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
573+
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[V_MOV_B32_e32_]]
574+
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
575+
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
576+
%1:agpr_32 = COPY killed %0
577+
SI_RETURN_TO_EPILOG implicit %1
578+
...
579+
580+
---
581+
name: fold_v_mov_b32_e32_inlineimm_to_agpr
582+
body: |
583+
bb.0:
584+
; GCN-LABEL: name: fold_v_mov_b32_e32_inlineimm_to_agpr
585+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
586+
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
587+
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_ACCVGPR_WRITE_B32_e64_]]
588+
%0:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
589+
%1:agpr_32 = COPY killed %0
590+
SI_RETURN_TO_EPILOG implicit %1
591+
...
592+
593+
---
594+
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_vgpr
595+
body: |
596+
bb.0:
597+
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_vgpr
598+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
599+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
600+
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B32_e32_]]
601+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
602+
%1:vgpr_32 = COPY killed %0
603+
SI_RETURN_TO_EPILOG implicit %1
604+
...
605+
606+
---
607+
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_agpr
608+
body: |
609+
bb.0:
610+
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_agpr
611+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
612+
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
613+
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_ACCVGPR_WRITE_B32_e64_]]
614+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
615+
%1:agpr_32 = COPY killed %0
616+
SI_RETURN_TO_EPILOG implicit %1
617+
...
618+
619+
---
620+
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_av
621+
body: |
622+
bb.0:
623+
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_av
624+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
625+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY killed [[AV_MOV_]]
626+
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
627+
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
628+
%1:av_32 = COPY killed %0
629+
SI_RETURN_TO_EPILOG implicit %1
630+
...

0 commit comments

Comments
 (0)