Skip to content

Commit b726d07

Browse files
committed
[AMDGPU] Reject moving PHI to VALU if the only VGPR input originated from move immediate
Summary: PHIs result register class is set to VGPR or SGPR depending on the cross block value divergence. In some cases uniform PHI need to be converted to return VGPR to prevent the oddnumber of moves values from VGPR to SGPR and back. PHI should certainly return VGPR if it has at least one VGPR input. This change adds the exception. We don't want to convert uniform PHI to VGPRs in case the only VGPR input is a VGPR to SGPR COPY and definition od the source VGPR in this COPY is move immediate. bb.0: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_32 = ..... bb.1: %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1 S_BRANCH %bb.3 bb.3: %1:sreg_32 = COPY %0 S_BRANCH %bb.2 Reviewers: rampitec Reviewed By: rampitec Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80434
1 parent 2321ab9 commit b726d07

File tree

2 files changed

+113
-2
lines changed

2 files changed

+113
-2
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,8 +835,22 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
835835
}
836836
else if (Def->isCopy() &&
837837
TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
838-
hasVGPRInput = true;
839-
break;
838+
Register SrcReg = Def->getOperand(1).getReg();
839+
MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
840+
unsigned SMovOp;
841+
int64_t Imm;
842+
if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) {
843+
hasVGPRInput = true;
844+
break;
845+
} else {
846+
// Formally, if we did not do this right away
847+
// it would be done on the next iteration of the
848+
// runOnMachineFunction main loop. But why not if we can?
849+
MachineFunction *MF = MI.getParent()->getParent();
850+
Def->getOperand(1).ChangeToImmediate(Imm);
851+
Def->addImplicitDefUseOperands(*MF);
852+
Def->setDesc(TII->get(SMovOp));
853+
}
840854
}
841855
}
842856

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s | FileCheck -check-prefix=GCN %s
2+
---
3+
# GCN_LABEL: phi_moveimm_input
4+
# GCN-NOT: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
5+
# GCN: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
6+
7+
name: phi_moveimm_input
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0:
11+
successors: %bb.1
12+
liveins: $sgpr0, $sgpr1
13+
14+
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
15+
16+
%4:sreg_32 = COPY $sgpr0
17+
%5:sreg_32 = COPY $sgpr1
18+
19+
bb.1:
20+
successors: %bb.2
21+
%2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc
22+
S_BRANCH %bb.2
23+
24+
bb.2:
25+
successors: %bb.3
26+
%3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
27+
S_BRANCH %bb.3
28+
29+
bb.3:
30+
successors: %bb.2
31+
%1:sreg_32 = COPY %0
32+
S_BRANCH %bb.2
33+
...
34+
35+
---
36+
# GCN_LABEL: phi_moveimm_subreg_input
37+
# GCN-NOT: %{{[0-9]+}}:sreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
38+
# GCN: %{{[0-9]+}}:vreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
39+
name: phi_moveimm_subreg_input
40+
tracksRegLiveness: true
41+
body: |
42+
bb.0:
43+
successors: %bb.1
44+
liveins: $sgpr0, $sgpr1
45+
46+
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
47+
48+
%4:sreg_32 = COPY $sgpr0
49+
%5:sreg_32 = COPY $sgpr1
50+
51+
bb.1:
52+
successors: %bb.2
53+
undef %2.sub0:sreg_64 = S_ADD_U32 %4, %5, implicit-def $scc
54+
S_BRANCH %bb.2
55+
56+
bb.2:
57+
successors: %bb.3
58+
%3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
59+
S_BRANCH %bb.3
60+
61+
bb.3:
62+
successors: %bb.2
63+
undef %1.sub0:sreg_64 = COPY %0
64+
S_BRANCH %bb.2
65+
...
66+
67+
68+
---
69+
# GCN_LABEL: phi_moveimm_bad_opcode_input
70+
# GCN-NOT: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
71+
# GCN: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
72+
name: phi_moveimm_bad_opcode_input
73+
tracksRegLiveness: true
74+
body: |
75+
bb.0:
76+
successors: %bb.1
77+
liveins: $sgpr0, $sgpr1, $vgpr0
78+
%6:vgpr_32 = COPY $vgpr0
79+
%0:vgpr_32 = V_MOV_B32_sdwa 0, %6:vgpr_32, 0, 5, 2, 4, implicit $exec, implicit %6:vgpr_32(tied-def 0)
80+
81+
%4:sreg_32 = COPY $sgpr0
82+
%5:sreg_32 = COPY $sgpr1
83+
84+
bb.1:
85+
86+
successors: %bb.2
87+
%2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc
88+
S_BRANCH %bb.2
89+
bb.2:
90+
successors: %bb.3
91+
%3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
92+
S_BRANCH %bb.3
93+
bb.3:
94+
successors: %bb.2
95+
%1:sreg_32 = COPY %0
96+
S_BRANCH %bb.2
97+
...

0 commit comments

Comments
 (0)