Skip to content

Commit a22a1fe

Browse files
authored
[AMDGPU] support 64-bit immediates in SIInstrInfo::FoldImmediate (llvm#69260)
This is a part of llvm#67781. Until we select more 64-bit move immediates the impact is minimal.
1 parent 66775f8 commit a22a1fe

File tree

5 files changed

+270
-25
lines changed

5 files changed

+270
-25
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3203,11 +3203,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
32033203
switch (DefMI.getOpcode()) {
32043204
default:
32053205
return false;
3206+
case AMDGPU::V_MOV_B64_e32:
32063207
case AMDGPU::S_MOV_B64:
3207-
// TODO: We could fold 64-bit immediates, but this get complicated
3208-
// when there are sub-registers.
3209-
return false;
3210-
3208+
case AMDGPU::V_MOV_B64_PSEUDO:
3209+
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
32113210
case AMDGPU::V_MOV_B32_e32:
32123211
case AMDGPU::S_MOV_B32:
32133212
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
@@ -3220,19 +3219,45 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
32203219
if (!ImmOp->isImm())
32213220
return false;
32223221

3222+
auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t {
3223+
int64_t Imm = ImmOp->getImm();
3224+
switch (UseOp.getSubReg()) {
3225+
default:
3226+
return Imm;
3227+
case AMDGPU::sub0:
3228+
return Lo_32(Imm);
3229+
case AMDGPU::sub1:
3230+
return Hi_32(Imm);
3231+
case AMDGPU::lo16:
3232+
return APInt(16, Imm).getSExtValue();
3233+
case AMDGPU::hi16:
3234+
return APInt(32, Imm).ashr(16).getSExtValue();
3235+
case AMDGPU::sub1_lo16:
3236+
return APInt(16, Hi_32(Imm)).getSExtValue();
3237+
case AMDGPU::sub1_hi16:
3238+
return APInt(32, Hi_32(Imm)).ashr(16).getSExtValue();
3239+
}
3240+
};
3241+
3242+
assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");
3243+
32233244
unsigned Opc = UseMI.getOpcode();
32243245
if (Opc == AMDGPU::COPY) {
3246+
assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form");
3247+
32253248
Register DstReg = UseMI.getOperand(0).getReg();
3226-
bool Is16Bit = getOpSize(UseMI, 0) == 2;
3249+
unsigned OpSize = getOpSize(UseMI, 0);
3250+
bool Is16Bit = OpSize == 2;
3251+
bool Is64Bit = OpSize == 8;
32273252
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
3228-
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
3229-
APInt Imm(32, ImmOp->getImm());
3230-
3231-
if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
3232-
Imm = Imm.ashr(16);
3253+
unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3254+
: AMDGPU::V_MOV_B32_e32
3255+
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3256+
: AMDGPU::S_MOV_B32;
3257+
APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)));
32333258

32343259
if (RI.isAGPR(*MRI, DstReg)) {
3235-
if (!isInlineConstant(Imm))
3260+
if (Is64Bit || !isInlineConstant(Imm))
32363261
return false;
32373262
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
32383263
}
@@ -3317,7 +3342,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
33173342
if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
33183343
return false;
33193344

3320-
const int64_t Imm = ImmOp->getImm();
3345+
const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
33213346

33223347
// FIXME: This would be a lot easier if we could return a new instruction
33233348
// instead of having to modify in place.
@@ -3401,8 +3426,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34013426
if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
34023427
return false;
34033428

3404-
const int64_t Imm = ImmOp->getImm();
3405-
34063429
// FIXME: This would be a lot easier if we could return a new instruction
34073430
// instead of having to modify in place.
34083431

@@ -3413,7 +3436,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34133436
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
34143437

34153438
// ChangingToImmediate adds Src2 back to the instruction.
3416-
Src2->ChangeToImmediate(Imm);
3439+
Src2->ChangeToImmediate(getImmFor(*Src2));
34173440

34183441
// These come before src2.
34193442
removeModOperands(UseMI);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def S_MOV_B64_IMM_PSEUDO : SPseudoInstSI <(outs SReg_64:$sdst),
151151
let SchedRW = [WriteSALU, Write64Bit];
152152
let Size = 16; // Needs maximum 2 s_mov_b32 instructions 8 byte long each.
153153
let Uses = [];
154+
let UseNamedOperandTable = 1;
154155
}
155156

156157
// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the

llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,10 @@ declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1
2828
; Function Attrs: norecurse
2929
define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 {
3030
; GCN-LABEL: {{^}}svm_node_closure_bsdf:
31-
; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30,
32-
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
33-
; GCN: s_movk_i32 s30, 0x60
31+
; GCN-NOT: v_writelane_b32
32+
; GCN: s_movk_i32 s28, 0x60
3433
; GCN-NOT: s31
35-
; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]],
36-
; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]],
34+
; GCN-NOT: v_readlane_b32
3735
; GCN: s_waitcnt vmcnt(0)
3836
; GCN: s_setpc_b64 s[30:31]
3937
entry:

llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir

Lines changed: 226 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
33

44
---
55
name: fold_simm_virtual
@@ -119,3 +119,228 @@ body: |
119119
SI_RETURN_TO_EPILOG $vgpr0_lo16
120120
121121
...
122+
123+
---
124+
name: fold_sreg_64_sub0_to_vgpr_32
125+
body: |
126+
bb.0:
127+
128+
; GCN-LABEL: name: fold_sreg_64_sub0_to_vgpr_32
129+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
130+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1412567312, implicit $exec
131+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]]
132+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
133+
%1:vgpr_32 = COPY killed %0.sub0
134+
SI_RETURN_TO_EPILOG %1
135+
136+
...
137+
138+
---
139+
name: fold_sreg_64_sub1_to_vgpr_32
140+
body: |
141+
bb.0:
142+
143+
; GCN-LABEL: name: fold_sreg_64_sub1_to_vgpr_32
144+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200
145+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 305419896, implicit $exec
146+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]]
147+
%0:sreg_64 = S_MOV_B64 1311768467750121200
148+
%1:vgpr_32 = COPY killed %0.sub1
149+
SI_RETURN_TO_EPILOG %1
150+
151+
...
152+
153+
---
154+
name: fold_vreg_64_sub1_to_vgpr_32
155+
body: |
156+
bb.0:
157+
158+
; GCN-LABEL: name: fold_vreg_64_sub1_to_vgpr_32
159+
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
160+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 305419896, implicit $exec
161+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B32_e32_]]
162+
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
163+
%1:vgpr_32 = COPY killed %0.sub1
164+
SI_RETURN_TO_EPILOG %1
165+
166+
...
167+
168+
---
169+
name: fold_sreg_64_to_vreg_64
170+
body: |
171+
bb.0:
172+
173+
; GCN-LABEL: name: fold_sreg_64_to_vreg_64
174+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
175+
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
176+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_MOV_B]]
177+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
178+
%1:vreg_64_align2 = COPY killed %0
179+
SI_RETURN_TO_EPILOG %1
180+
181+
...
182+
183+
---
184+
name: fold_sreg_64_to_sreg_64
185+
body: |
186+
bb.0:
187+
188+
; GCN-LABEL: name: fold_sreg_64_to_sreg_64
189+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200
190+
; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
191+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_MOV_B]]
192+
%0:sreg_64 = S_MOV_B64 1311768467750121200
193+
%1:sreg_64 = COPY killed %0
194+
SI_RETURN_TO_EPILOG %1
195+
196+
...
197+
198+
---
199+
name: fold_sreg_64_lo16_to_sgpr_lo16
200+
body: |
201+
bb.0:
202+
203+
; GCN-LABEL: name: fold_sreg_64_lo16_to_sgpr_lo16
204+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
205+
; GCN-NEXT: $sgpr0 = S_MOV_B32 1
206+
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
207+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
208+
$sgpr0_lo16 = COPY killed %0.lo16
209+
SI_RETURN_TO_EPILOG $sgpr0_lo16
210+
211+
...
212+
213+
---
214+
name: fold_sreg_64_hi16_to_sgpr_lo16
215+
body: |
216+
bb.0:
217+
218+
; GCN-LABEL: name: fold_sreg_64_hi16_to_sgpr_lo16
219+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
220+
; GCN-NEXT: $sgpr0 = S_MOV_B32 2
221+
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
222+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
223+
$sgpr0_lo16 = COPY killed %0.hi16
224+
SI_RETURN_TO_EPILOG $sgpr0_lo16
225+
226+
...
227+
228+
---
229+
name: fold_sreg_64_sub1_lo16_to_sgpr_lo16
230+
body: |
231+
bb.0:
232+
233+
; GCN-LABEL: name: fold_sreg_64_sub1_lo16_to_sgpr_lo16
234+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
235+
; GCN-NEXT: $sgpr0 = S_MOV_B32 3
236+
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
237+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
238+
$sgpr0_lo16 = COPY killed %0.sub1_lo16
239+
SI_RETURN_TO_EPILOG $sgpr0_lo16
240+
241+
...
242+
243+
---
244+
name: fold_sreg_64_sub1_hi16_to_sgpr_lo16
245+
body: |
246+
bb.0:
247+
248+
; GCN-LABEL: name: fold_sreg_64_sub1_hi16_to_sgpr_lo16
249+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
250+
; GCN-NEXT: $sgpr0 = S_MOV_B32 4
251+
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0_lo16
252+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 1125912791875585
253+
$sgpr0_lo16 = COPY killed %0.sub1_hi16
254+
SI_RETURN_TO_EPILOG $sgpr0_lo16
255+
256+
...
257+
258+
---
259+
name: fmac_sreg_64_sub0_src0_to_fmamk
260+
tracksRegLiveness: true
261+
body: |
262+
bb.0:
263+
264+
; GCN-LABEL: name: fmac_sreg_64_sub0_src0_to_fmamk
265+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
266+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
267+
; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 2882399984, [[DEF1]], implicit $mode, implicit $exec
268+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]]
269+
%0:vgpr_32 = IMPLICIT_DEF
270+
%1:vgpr_32 = IMPLICIT_DEF
271+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
272+
%3:vgpr_32 = V_FMAC_F32_e64 0, %2.sub0, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
273+
SI_RETURN_TO_EPILOG %3
274+
...
275+
276+
---
277+
name: fmac_sreg_64_sub1_src0_to_fmamk
278+
tracksRegLiveness: true
279+
body: |
280+
bb.0:
281+
282+
; GCN-LABEL: name: fmac_sreg_64_sub1_src0_to_fmamk
283+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
284+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
285+
; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 305419896, [[DEF1]], implicit $mode, implicit $exec
286+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]]
287+
%0:vgpr_32 = IMPLICIT_DEF
288+
%1:vgpr_32 = IMPLICIT_DEF
289+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
290+
%3:vgpr_32 = V_FMAC_F32_e64 0, %2.sub1, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
291+
SI_RETURN_TO_EPILOG %3
292+
...
293+
294+
---
295+
name: fmac_sreg_64_sub1_src1_to_fmaak
296+
tracksRegLiveness: true
297+
body: |
298+
bb.0:
299+
300+
; GCN-LABEL: name: fmac_sreg_64_sub1_src1_to_fmaak
301+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
302+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
303+
; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = V_FMAMK_F32 [[DEF]], 305419896, [[DEF1]], implicit $mode, implicit $exec
304+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F32_]]
305+
%0:vgpr_32 = IMPLICIT_DEF
306+
%1:vgpr_32 = IMPLICIT_DEF
307+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
308+
%3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, %2.sub1, 0, %1, 0, 0, implicit $mode, implicit $exec
309+
SI_RETURN_TO_EPILOG %3
310+
...
311+
312+
---
313+
name: fma_sreg_64_sub0_to_fmaak
314+
tracksRegLiveness: true
315+
body: |
316+
bb.0:
317+
318+
; GCN-LABEL: name: fma_sreg_64_sub0_to_fmaak
319+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
320+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
321+
; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 2882399984, implicit $mode, implicit $exec
322+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F32_]]
323+
%0:vgpr_32 = IMPLICIT_DEF
324+
%1:vgpr_32 = IMPLICIT_DEF
325+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
326+
%3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, %2.sub0, 0, 0, implicit $mode, implicit $exec
327+
SI_RETURN_TO_EPILOG %3
328+
...
329+
330+
---
331+
name: fma_sreg_64_sub1_to_fmaak
332+
tracksRegLiveness: true
333+
body: |
334+
bb.0:
335+
336+
; GCN-LABEL: name: fma_sreg_64_sub1_to_fmaak
337+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
338+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
339+
; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = V_FMAAK_F32 [[DEF]], [[DEF1]], 305419896, implicit $mode, implicit $exec
340+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F32_]]
341+
%0:vgpr_32 = IMPLICIT_DEF
342+
%1:vgpr_32 = IMPLICIT_DEF
343+
%2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
344+
%3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, %1, 0, %2.sub1, 0, 0, implicit $mode, implicit $exec
345+
SI_RETURN_TO_EPILOG %3
346+
...

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
573573
; GFX900-NEXT: v_mov_b32_e32 v3, s35
574574
; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1
575575
; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v3, vcc
576-
; GFX900-NEXT: s_movk_i32 s0, 0x5000
577-
; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, s0, v1
576+
; GFX900-NEXT: v_add_co_u32_e32 v1, vcc, 0x5000, v1
578577
; GFX900-NEXT: v_mov_b32_e32 v3, 0
579578
; GFX900-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
580579
; GFX900-NEXT: s_movk_i32 s2, 0x7f
@@ -805,8 +804,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
805804
; GFX90A-NEXT: v_mov_b32_e32 v2, s35
806805
; GFX90A-NEXT: v_add_co_u32_e32 v1, vcc, s34, v1
807806
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v2, vcc
808-
; GFX90A-NEXT: s_movk_i32 s0, 0x5000
809-
; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s0, v1
807+
; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x5000, v1
810808
; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
811809
; GFX90A-NEXT: s_movk_i32 s2, 0x7f
812810
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], 0, 0

0 commit comments

Comments
 (0)