Skip to content

Commit 265d5c6

Browse files
author
Jun Wang
committed
(1) Fix a problem with reserving ScratchRSrcD (2) update test files.
1 parent 6f2289b commit 265d5c6

13 files changed

+2614
-1871
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,12 @@ bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
317317
return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
318318
}
319319

320-
/// Special handling for i1 return val: based on determineAndHandleAssignments()
321-
bool AMDGPUCallLowering::determineAndHandleAssignmentsForI1Return(
320+
/// Replace CallLowering::determineAndHandleAssignments() because we need to
321+
/// reserve ScratchRSrcReg when necessary.
322+
/// TODO: Investigate if reserving ScratchRSrcReg can be moved to calling conv
323+
/// functions. If so, then this function is not needed anymore -- we can just
324+
/// use CallLowering::determineAndHandleAssignments() as before.
325+
bool AMDGPUCallLowering::determineAndHandleAssignmentsLocal(
322326
ValueHandler &Handler, ValueAssigner &Assigner,
323327
SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
324328
CallingConv::ID CallConv, bool IsVarArg) const {
@@ -405,12 +409,8 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
405409
OutgoingValueAssigner Assigner(AssignFn);
406410
AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
407411

408-
if (SplitEVTs.size() == 1 && SplitEVTs[0] == MVT::i1)
409-
return determineAndHandleAssignmentsForI1Return(
410-
RetHandler, Assigner, SplitRetInfos, B, CC, F.isVarArg());
411-
else
412-
return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
413-
CC, F.isVarArg());
412+
return determineAndHandleAssignmentsLocal(RetHandler, Assigner, SplitRetInfos,
413+
B, CC, F.isVarArg());
414414
}
415415

416416
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
@@ -1575,16 +1575,10 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
15751575
Info.IsVarArg);
15761576
IncomingValueAssigner Assigner(RetAssignFn);
15771577
CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1578-
if (Info.OrigRet.Ty->isIntegerTy(1)) {
1579-
if (!determineAndHandleAssignmentsForI1Return(Handler, Assigner, InArgs,
1580-
MIRBuilder, Info.CallConv,
1581-
Info.IsVarArg))
1582-
return false;
1583-
} else {
1584-
if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1585-
Info.CallConv, Info.IsVarArg))
1586-
return false;
1587-
}
1578+
if (!determineAndHandleAssignmentsLocal(Handler, Assigner, InArgs,
1579+
MIRBuilder, Info.CallConv,
1580+
Info.IsVarArg))
1581+
return false;
15881582
}
15891583

15901584
uint64_t CalleePopBytes = NumBytes;

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ class AMDGPUCallLowering final : public CallLowering {
3737
bool lowerReturnVal(MachineIRBuilder &B, const Value *Val,
3838
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
3939

40-
bool determineAndHandleAssignmentsForI1Return(ValueHandler &Handler,
41-
ValueAssigner &Assigner,
42-
SmallVectorImpl<ArgInfo> &Args,
43-
MachineIRBuilder &MIRBuilder,
44-
CallingConv::ID CallConv,
45-
bool IsVarArg) const;
40+
bool determineAndHandleAssignmentsLocal(ValueHandler &Handler,
41+
ValueAssigner &Assigner,
42+
SmallVectorImpl<ArgInfo> &Args,
43+
MachineIRBuilder &MIRBuilder,
44+
CallingConv::ID CallConv,
45+
bool IsVarArg) const;
4646

4747
public:
4848
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);

llvm/test/CodeGen/AMDGPU/GlobalISel/function-call-i1-return.ll

Lines changed: 299 additions & 5 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/function-i1-args.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,87 @@ define void @test_call_void_func_a2i1() {
232232
ret void
233233
}
234234

235+
define void @void_func_v2i1(<2 x i1> %arg0) {
236+
; GFX9-LABEL: name: void_func_v2i1
237+
; GFX9: bb.1 (%ir-block.0):
238+
; GFX9-NEXT: liveins: $vgpr0, $vgpr1
239+
; GFX9-NEXT: {{ $}}
240+
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
241+
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
242+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
243+
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
244+
; GFX9-NEXT: [[BUILDVEC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
245+
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILDVEC]](<2 x s16>)
246+
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
247+
; GFX9-NEXT: G_STORE [[TRUNC2]](<2 x s1>), [[DEF]](p1) :: (store (<2 x s1>) into `ptr addrspace(1) undef`, addrspace 1)
248+
; GFX9-NEXT: SI_RETURN
249+
;
250+
; GFX11-LABEL: name: void_func_v2i1
251+
; GFX11: bb.1 (%ir-block.0):
252+
; GFX11-NEXT: liveins: $vgpr0, $vgpr1
253+
; GFX11-NEXT: {{ $}}
254+
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
255+
; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
256+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
257+
; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
258+
; GFX11-NEXT: [[BUILDVEC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
259+
; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILDVEC]](<2 x s16>)
260+
; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
261+
; GFX11-NEXT: G_STORE [[TRUNC2]](<2 x s1>), [[DEF]](p1) :: (store (<2 x s1>) into `ptr addrspace(1) undef`, addrspace 1)
262+
; GFX11-NEXT: SI_RETURN
263+
store <2 x i1> %arg0, ptr addrspace(1) undef
264+
ret void
265+
}
266+
267+
define void @test_call_void_func_v2i1(ptr addrspace(1) %in) {
268+
; GFX9-LABEL: name: test_call_void_func_v2i1
269+
; GFX9: bb.1 (%ir-block.0):
270+
; GFX9-NEXT: liveins: $vgpr0, $vgpr1
271+
; GFX9-NEXT: {{ $}}
272+
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
273+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
274+
; GFX9-NEXT: [[MERGE:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
275+
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s1>) = G_LOAD [[MERGE]](p1) :: (load (<2 x s1>) from %ir.in, addrspace 1)
276+
; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
277+
; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @[[CALLEE:void_func_v2i1]]
278+
; GFX9-NEXT: [[UNMERGE:%[0-9]+]]:_(s1), [[UNMERGE1:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[LOAD]](<2 x s1>)
279+
; GFX9-NEXT: [[EXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UNMERGE]](s1)
280+
; GFX9-NEXT: [[EXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UNMERGE1]](s1)
281+
; GFX9-NEXT: [[EXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EXT]](s16)
282+
; GFX9-NEXT: $vgpr0 = COPY [[EXT2]](s32)
283+
; GFX9-NEXT: [[EXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXT1]](s16)
284+
; GFX9-NEXT: $vgpr1 = COPY [[EXT3]](s32)
285+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
286+
; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
287+
; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @[[CALLEE]], csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
288+
; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
289+
; GFX9-NEXT: SI_RETURN
290+
;
291+
; GFX11-LABEL: name: test_call_void_func_v2i1
292+
; GFX11: bb.1 (%ir-block.0):
293+
; GFX11-NEXT: liveins: $vgpr0, $vgpr1
294+
; GFX11-NEXT: {{ $}}
295+
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
296+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
297+
; GFX11-NEXT: [[MERGE:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
298+
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s1>) = G_LOAD [[MERGE]](p1) :: (load (<2 x s1>) from %ir.in, addrspace 1)
299+
; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
300+
; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @[[CALLEE:void_func_v2i1]]
301+
; GFX11-NEXT: [[UNMERGE:%[0-9]+]]:_(s1), [[UNMERGE1:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[LOAD]](<2 x s1>)
302+
; GFX11-NEXT: [[EXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UNMERGE]](s1)
303+
; GFX11-NEXT: [[EXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UNMERGE1]](s1)
304+
; GFX11-NEXT: [[EXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EXT]](s16)
305+
; GFX11-NEXT: $vgpr0 = COPY [[EXT2]](s32)
306+
; GFX11-NEXT: [[EXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXT1]](s16)
307+
; GFX11-NEXT: $vgpr1 = COPY [[EXT3]](s32)
308+
; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @[[CALLEE]], csr_amdgpu, implicit $vgpr0, implicit $vgpr1
309+
; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
310+
; GFX11-NEXT: SI_RETURN
311+
%a = load <2 x i1>, ptr addrspace(1) %in
312+
call void @void_func_v2i1(<2 x i1> %a)
313+
ret void
314+
}
315+
235316
define void @void_func_i1_i1(i1 %arg0, i1 %arg1) {
236317
; GFX9-LABEL: name: void_func_i1_i1
237318
; GFX9: bb.1 (%ir-block.0):

llvm/test/CodeGen/AMDGPU/allow-check.ll

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=0 -fast-isel=0 | FileCheck %s
3-
; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=1 -fast-isel=0 | FileCheck %s
3+
; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=1 -fast-isel=0 | FileCheck -check-prefixes=GISEL %s
44
; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=0 -fast-isel=1 | FileCheck %s
55

66
define i1 @test_runtime() local_unnamed_addr {
77
; CHECK-LABEL: test_runtime:
88
; CHECK: ; %bb.0: ; %entry
99
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10-
; CHECK-NEXT: v_mov_b32_e32 v0, 1
10+
; CHECK-NEXT: s_mov_b64 s[4:5], -1
1111
; CHECK-NEXT: s_setpc_b64 s[30:31]
12+
;
13+
; GISEL-LABEL: test_runtime:
14+
; GISEL: ; %bb.0: ; %entry
15+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16+
; GISEL-NEXT: s_mov_b64 s[4:5], 1
17+
; GISEL-NEXT: s_setpc_b64 s[30:31]
1218
entry:
1319
%allow = call i1 @llvm.allow.runtime.check(metadata !"test_check")
1420
ret i1 %allow
@@ -20,8 +26,14 @@ define i1 @test_ubsan() local_unnamed_addr {
2026
; CHECK-LABEL: test_ubsan:
2127
; CHECK: ; %bb.0: ; %entry
2228
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23-
; CHECK-NEXT: v_mov_b32_e32 v0, 1
29+
; CHECK-NEXT: s_mov_b64 s[4:5], -1
2430
; CHECK-NEXT: s_setpc_b64 s[30:31]
31+
;
32+
; GISEL-LABEL: test_ubsan:
33+
; GISEL: ; %bb.0: ; %entry
34+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35+
; GISEL-NEXT: s_mov_b64 s[4:5], 1
36+
; GISEL-NEXT: s_setpc_b64 s[30:31]
2537
entry:
2638
%allow = call i1 @llvm.allow.ubsan.check(i8 7)
2739
ret i1 %allow

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33540,28 +33540,28 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3354033540
; GFX8-LABEL: v_select_fneg_lhs_bf16:
3354133541
; GFX8: ; %bb.0:
3354233542
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33543-
; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
33543+
; GFX8-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3354433544
; GFX8-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[4:5]
3354533545
; GFX8-NEXT: s_setpc_b64 s[30:31]
3354633546
;
3354733547
; GFX9-LABEL: v_select_fneg_lhs_bf16:
3354833548
; GFX9: ; %bb.0:
3354933549
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33550-
; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
33550+
; GFX9-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3355133551
; GFX9-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[4:5]
3355233552
; GFX9-NEXT: s_setpc_b64 s[30:31]
3355333553
;
3355433554
; GFX10-LABEL: v_select_fneg_lhs_bf16:
3355533555
; GFX10: ; %bb.0:
3355633556
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33557-
; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
33557+
; GFX10-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3355833558
; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, v0, s4
3355933559
; GFX10-NEXT: s_setpc_b64 s[30:31]
3356033560
;
3356133561
; GFX11-LABEL: v_select_fneg_lhs_bf16:
3356233562
; GFX11: ; %bb.0:
3356333563
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33564-
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
33564+
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3356533565
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3356633566
; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, v0, s0
3356733567
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -33592,28 +33592,28 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
3359233592
; GFX8-LABEL: v_select_fneg_rhs_bf16:
3359333593
; GFX8: ; %bb.0:
3359433594
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33595-
; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
33595+
; GFX8-NEXT: v_xor_b32_e32 v1, 0x8000, v1
3359633596
; GFX8-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[4:5]
3359733597
; GFX8-NEXT: s_setpc_b64 s[30:31]
3359833598
;
3359933599
; GFX9-LABEL: v_select_fneg_rhs_bf16:
3360033600
; GFX9: ; %bb.0:
3360133601
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33602-
; GFX9-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
33602+
; GFX9-NEXT: v_xor_b32_e32 v1, 0x8000, v1
3360333603
; GFX9-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[4:5]
3360433604
; GFX9-NEXT: s_setpc_b64 s[30:31]
3360533605
;
3360633606
; GFX10-LABEL: v_select_fneg_rhs_bf16:
3360733607
; GFX10: ; %bb.0:
3360833608
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33609-
; GFX10-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
33609+
; GFX10-NEXT: v_xor_b32_e32 v1, 0x8000, v1
3361033610
; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, v0, s4
3361133611
; GFX10-NEXT: s_setpc_b64 s[30:31]
3361233612
;
3361333613
; GFX11-LABEL: v_select_fneg_rhs_bf16:
3361433614
; GFX11: ; %bb.0:
3361533615
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33616-
; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
33616+
; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
3361733617
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3361833618
; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, v0, s0
3361933619
; GFX11-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)