Skip to content

Commit 9ef166e

Browse files
committed
[AMDGPU] Fix FoldImmediate for 16 bit operand
Differential Revision: https://reviews.llvm.org/D79362
1 parent 55b9b11 commit 9ef166e

File tree

3 files changed

+288
-9
lines changed

3 files changed

+288
-9
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,15 +2509,41 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
25092509

25102510
unsigned Opc = UseMI.getOpcode();
25112511
if (Opc == AMDGPU::COPY) {
2512-
bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
2512+
Register DstReg = UseMI.getOperand(0).getReg();
2513+
Register SrcReg = UseMI.getOperand(1).getReg();
2514+
bool Is16Bit = getOpSize(UseMI, 0) == 2;
2515+
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
25132516
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2514-
if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
2515-
if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
2517+
APInt Imm(32, ImmOp->getImm());
2518+
2519+
if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
2520+
Imm = Imm.ashr(16);
2521+
2522+
if (RI.isAGPR(*MRI, DstReg)) {
2523+
if (!isInlineConstant(Imm))
25162524
return false;
25172525
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
25182526
}
2527+
2528+
if (Is16Bit) {
2529+
if (isVGPRCopy)
2530+
return false; // Do not clobber vgpr_hi16
2531+
2532+
if (DstReg.isVirtual() &&
2533+
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
2534+
return false;
2535+
2536+
UseMI.getOperand(0).setSubReg(0);
2537+
if (DstReg.isPhysical()) {
2538+
DstReg = RI.get32BitRegister(DstReg);
2539+
UseMI.getOperand(0).setReg(DstReg);
2540+
}
2541+
assert(SrcReg.isVirtual());
2542+
}
2543+
25192544
UseMI.setDesc(get(NewOpc));
2520-
UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
2545+
UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
2546+
UseMI.getOperand(1).setTargetFlags(0);
25212547
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
25222548
return true;
25232549
}

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -827,11 +827,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
827827
const MachineOperand &MO = MI.getOperand(OpNo);
828828
if (MO.isReg()) {
829829
if (unsigned SubReg = MO.getSubReg()) {
830-
assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
831-
MI.getParent()->getParent()->getRegInfo().
832-
getRegClass(MO.getReg()), SubReg)) >= 32 &&
833-
"Sub-dword subregs are not supported");
834-
return RI.getNumChannelsFromSubReg(SubReg) * 4;
830+
return RI.getSubRegIdxSize(SubReg) / 8;
835831
}
836832
}
837833
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: fold_simm_16_sub_to_lo
6+
body: |
7+
bb.0:
8+
9+
; GCN-LABEL: name: fold_simm_16_sub_to_lo
10+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
11+
; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
12+
; GCN: SI_RETURN_TO_EPILOG [[COPY]]
13+
%0:sreg_32 = S_MOV_B32 2048
14+
%1:sgpr_lo16 = COPY killed %0.lo16
15+
SI_RETURN_TO_EPILOG %1
16+
17+
...
18+
19+
---
20+
name: fold_simm_16_sub_to_sub
21+
body: |
22+
bb.0:
23+
24+
; GCN-LABEL: name: fold_simm_16_sub_to_sub
25+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
26+
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
27+
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
28+
%0:sreg_32 = S_MOV_B32 2048
29+
%1.lo16:sreg_32 = COPY killed %0.lo16
30+
SI_RETURN_TO_EPILOG %1
31+
32+
...
33+
34+
---
35+
name: fold_simm_16_sub_to_phys
36+
body: |
37+
bb.0:
38+
39+
; GCN-LABEL: name: fold_simm_16_sub_to_phys
40+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
41+
; GCN: $sgpr0 = S_MOV_B32 2048
42+
; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16
43+
%0:sreg_32 = S_MOV_B32 2048
44+
$sgpr0_lo16 = COPY killed %0.lo16
45+
SI_RETURN_TO_EPILOG $sgpr0_lo16
46+
47+
...
48+
49+
---
50+
name: fold_aimm_16_sub_to_sub_2048
51+
body: |
52+
bb.0:
53+
54+
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
55+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
56+
; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16
57+
; GCN: SI_RETURN_TO_EPILOG %1
58+
%0:sreg_32 = S_MOV_B32 2048
59+
%1.lo16:agpr_32 = COPY killed %0.lo16
60+
SI_RETURN_TO_EPILOG %1
61+
62+
...
63+
64+
---
65+
name: fold_aimm_16_sub_to_sub_0
66+
body: |
67+
bb.0:
68+
69+
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
70+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
71+
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
72+
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
73+
%0:sreg_32 = S_MOV_B32 0
74+
%1.lo16:agpr_32 = COPY killed %0.lo16
75+
SI_RETURN_TO_EPILOG %1
76+
77+
...
78+
79+
---
80+
name: fold_aimm_16_sub_to_phys
81+
body: |
82+
bb.0:
83+
84+
; GCN-LABEL: name: fold_aimm_16_sub_to_phys
85+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
86+
; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
87+
; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16
88+
%0:sreg_32 = S_MOV_B32 0
89+
$agpr0_lo16 = COPY killed %0.lo16
90+
SI_RETURN_TO_EPILOG $agpr0_lo16
91+
92+
...
93+
94+
---
95+
name: fold_vimm_16_sub_to_lo
96+
body: |
97+
bb.0:
98+
99+
; GCN-LABEL: name: fold_vimm_16_sub_to_lo
100+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
101+
; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
102+
; GCN: SI_RETURN_TO_EPILOG [[COPY]]
103+
%0:sreg_32 = S_MOV_B32 2048
104+
%1:vgpr_lo16 = COPY killed %0.lo16
105+
SI_RETURN_TO_EPILOG %1
106+
107+
...
108+
109+
---
110+
name: fold_vimm_16_sub_to_sub
111+
body: |
112+
bb.0:
113+
114+
; GCN-LABEL: name: fold_vimm_16_sub_to_sub
115+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
116+
; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
117+
; GCN: SI_RETURN_TO_EPILOG %1
118+
%0:sreg_32 = S_MOV_B32 2048
119+
%1.lo16:vgpr_32 = COPY killed %0.lo16
120+
SI_RETURN_TO_EPILOG %1
121+
122+
...
123+
124+
---
125+
name: fold_vimm_16_sub_to_phys
126+
body: |
127+
bb.0:
128+
129+
; GCN-LABEL: name: fold_vimm_16_sub_to_phys
130+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
131+
; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16
132+
; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16
133+
%0:sreg_32 = S_MOV_B32 2048
134+
$vgpr0_lo16 = COPY killed %0.lo16
135+
SI_RETURN_TO_EPILOG $vgpr0_lo16
136+
137+
...
138+
139+
---
140+
name: fold_vimm_16_lo_to_hi
141+
body: |
142+
bb.0:
143+
144+
; GCN-LABEL: name: fold_vimm_16_lo_to_hi
145+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
146+
; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
147+
; GCN: SI_RETURN_TO_EPILOG %1
148+
%0:sreg_32 = S_MOV_B32 2048
149+
%1.hi16:vgpr_32 = COPY killed %0.lo16
150+
SI_RETURN_TO_EPILOG %1
151+
152+
...
153+
154+
---
155+
name: fold_vimm_16_hi_to_lo
156+
body: |
157+
bb.0:
158+
159+
; GCN-LABEL: name: fold_vimm_16_hi_to_lo
160+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
161+
; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16
162+
; GCN: SI_RETURN_TO_EPILOG %1
163+
%0:sreg_32 = S_MOV_B32 2048
164+
%1.lo16:vgpr_32 = COPY killed %0.hi16
165+
SI_RETURN_TO_EPILOG %1
166+
167+
...
168+
169+
---
170+
name: fold_simm_16_sub_to_sub_lo_to_hi
171+
body: |
172+
bb.0:
173+
174+
; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi
175+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
176+
; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16
177+
; GCN: SI_RETURN_TO_EPILOG %1
178+
%0:sreg_32 = S_MOV_B32 2048
179+
%1.hi16:sreg_32 = COPY killed %0.lo16
180+
SI_RETURN_TO_EPILOG %1
181+
182+
...
183+
184+
---
185+
name: fold_simm_16_sub_to_sub_hi_to_lo_2048
186+
body: |
187+
bb.0:
188+
189+
; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048
190+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
191+
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
192+
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
193+
%0:sreg_32 = S_MOV_B32 2048
194+
%1.lo16:sreg_32 = COPY killed %0.hi16
195+
SI_RETURN_TO_EPILOG %1
196+
197+
...
198+
199+
---
200+
name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
201+
body: |
202+
bb.0:
203+
204+
; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
205+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
206+
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
207+
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
208+
%0:sreg_32 = S_MOV_B32 134217728
209+
%1.lo16:sreg_32 = COPY killed %0.hi16
210+
SI_RETURN_TO_EPILOG %1
211+
212+
...
213+
214+
---
215+
name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
216+
body: |
217+
bb.0:
218+
219+
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
220+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
221+
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
222+
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
223+
%0:sreg_32 = S_MOV_B32 2048
224+
%1.lo16:agpr_32 = COPY killed %0.hi16
225+
SI_RETURN_TO_EPILOG %1
226+
227+
...
228+
229+
---
230+
name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
231+
body: |
232+
bb.0:
233+
234+
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
235+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536
236+
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec
237+
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
238+
%0:sreg_32 = S_MOV_B32 65536
239+
%1.lo16:agpr_32 = COPY killed %0.hi16
240+
SI_RETURN_TO_EPILOG %1
241+
242+
...
243+
244+
---
245+
name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
246+
body: |
247+
bb.0:
248+
249+
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
250+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
251+
; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16
252+
; GCN: SI_RETURN_TO_EPILOG %1
253+
%0:sreg_32 = S_MOV_B32 134217728
254+
%1.lo16:agpr_32 = COPY killed %0.hi16
255+
SI_RETURN_TO_EPILOG %1
256+
257+
...

0 commit comments

Comments
 (0)