Skip to content

Commit 256343a

Browse files
authored
Revert "Update amdgpu_gfx functions to use s0-s3 for inreg SGPR arguments on targets using scratch instructions for stack #78226" (#86273)
Reverts #81394 This reverts commit 3ac243b. It is not handling RSrc registers s0-s3 correctly. This leads to a broken test, where it expects s0-s3 as function argument and uses it as RSrc register as well. We need to re-visit the patch, but apparently we only want to have s0-s3 as argument registers if we don't need them as RSrc registers.
1 parent fbc2473 commit 256343a

12 files changed

+2563
-2072
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -715,13 +715,10 @@ bool AMDGPUCallLowering::lowerFormalArguments(
715715
if (!IsEntryFunc && !IsGraphics) {
716716
// For the fixed ABI, pass workitem IDs in the last argument register.
717717
TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
718-
}
719718

720-
if (!IsEntryFunc) {
721719
if (!Subtarget.enableFlatScratch())
722720
CCInfo.AllocateReg(Info->getScratchRSrcReg());
723-
if (!IsGraphics)
724-
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
721+
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
725722
}
726723

727724
IncomingValueAssigner Assigner(AssignFn);

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def CC_SI_Gfx : CallingConv<[
2323
// 33 is reserved for the frame pointer
2424
// 34 is reserved for the base pointer
2525
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
26-
SGPR0, SGPR1, SGPR2, SGPR3,
2726
SGPR4, SGPR5, SGPR6, SGPR7,
2827
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
2928
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2859,13 +2859,12 @@ SDValue SITargetLowering::LowerFormalArguments(
28592859
} else if (!IsGraphics) {
28602860
// For the fixed ABI, pass workitem IDs in the last argument register.
28612861
allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
2862-
}
28632862

2864-
if (!IsEntryFunc) {
2863+
// FIXME: Sink this into allocateSpecialInputSGPRs
28652864
if (!Subtarget->enableFlatScratch())
28662865
CCInfo.AllocateReg(Info->getScratchRSrcReg());
2867-
if (!IsGraphics)
2868-
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
2866+
2867+
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
28692868
}
28702869

28712870
if (!IsKernel) {

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
5050
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
5151
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
5252
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
53-
; CHECK-NEXT: $sgpr0 = COPY [[C]](s32)
53+
; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
5454
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
5555
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
56-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
56+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
5757
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
5858
; CHECK-NEXT: SI_RETURN
5959
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
@@ -99,11 +99,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
9999
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
100100
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
101101
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
102-
; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT1]](s32)
103-
; CHECK-NEXT: $sgpr1 = COPY [[LOAD2]](s32)
102+
; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
103+
; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
104104
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
105105
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
106-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
106+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
107107
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
108108
; CHECK-NEXT: SI_RETURN
109109
%ptr0 = load ptr addrspace(1), ptr addrspace(4) undef

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -942,10 +942,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
942942
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
943943
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
944944
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
945-
; CHECK-NEXT: $sgpr0 = COPY [[C]](s32)
945+
; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
946946
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
947947
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
948-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
948+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
949949
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
950950
; CHECK-NEXT: SI_RETURN
951951
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
@@ -3984,11 +3984,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
39843984
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
39853985
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
39863986
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
3987-
; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT1]](s32)
3988-
; CHECK-NEXT: $sgpr1 = COPY [[LOAD2]](s32)
3987+
; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
3988+
; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
39893989
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
39903990
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
3991-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
3991+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
39923992
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
39933993
; CHECK-NEXT: SI_RETURN
39943994
%ptr0 = load ptr addrspace(1), ptr addrspace(4) undef

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3337,7 +3337,7 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
33373337
; GFX11-LABEL: test_inreg_arg_store:
33383338
; GFX11: ; %bb.0:
33393339
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3340-
; GFX11-NEXT: v_mov_b32_e32 v2, s0
3340+
; GFX11-NEXT: v_mov_b32_e32 v2, s4
33413341
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
33423342
; GFX11-NEXT: s_setpc_b64 s[30:31]
33433343
store bfloat %in, ptr addrspace(1) %out

llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ define amdgpu_gfx void @test34(i32 inreg %arg1, i32 inreg %arg2) {
472472
; GCN-LABEL: test34:
473473
; GCN: ; %bb.0:
474474
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475-
; GCN-NEXT: s_min_i32 s0, s0, s1
475+
; GCN-NEXT: s_min_i32 s0, s4, s5
476476
; GCN-NEXT: v_mov_b32_e32 v0, 0
477477
; GCN-NEXT: s_cmpk_lt_i32 s0, 0x3e9
478478
; GCN-NEXT: v_mov_b32_e32 v1, 0
@@ -492,7 +492,7 @@ define amdgpu_gfx void @test35(i32 inreg %arg1, i32 inreg %arg2) {
492492
; GCN-LABEL: test35:
493493
; GCN: ; %bb.0:
494494
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495-
; GCN-NEXT: s_max_i32 s0, s0, s1
495+
; GCN-NEXT: s_max_i32 s0, s4, s5
496496
; GCN-NEXT: v_mov_b32_e32 v0, 0
497497
; GCN-NEXT: s_cmpk_gt_i32 s0, 0x3e8
498498
; GCN-NEXT: v_mov_b32_e32 v1, 0
@@ -512,9 +512,9 @@ define amdgpu_gfx void @test36(i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3
512512
; GCN-LABEL: test36:
513513
; GCN: ; %bb.0:
514514
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515-
; GCN-NEXT: s_min_u32 s0, s0, s1
515+
; GCN-NEXT: s_min_u32 s0, s4, s5
516516
; GCN-NEXT: v_mov_b32_e32 v0, 0
517-
; GCN-NEXT: s_cmp_lt_u32 s0, s2
517+
; GCN-NEXT: s_cmp_lt_u32 s0, s6
518518
; GCN-NEXT: v_mov_b32_e32 v1, 0
519519
; GCN-NEXT: s_cselect_b32 s0, -1, 0
520520
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
@@ -532,9 +532,9 @@ define amdgpu_gfx void @test37(i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3
532532
; GCN-LABEL: test37:
533533
; GCN: ; %bb.0:
534534
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535-
; GCN-NEXT: s_max_i32 s0, s0, s1
535+
; GCN-NEXT: s_max_i32 s0, s4, s5
536536
; GCN-NEXT: v_mov_b32_e32 v0, 0
537-
; GCN-NEXT: s_cmp_ge_i32 s0, s2
537+
; GCN-NEXT: s_cmp_ge_i32 s0, s6
538538
; GCN-NEXT: v_mov_b32_e32 v1, 0
539539
; GCN-NEXT: s_cselect_b32 s0, -1, 0
540540
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
@@ -552,7 +552,7 @@ define amdgpu_gfx void @test38(i32 inreg %arg1, i32 inreg %arg2) {
552552
; GCN-LABEL: test38:
553553
; GCN: ; %bb.0:
554554
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555-
; GCN-NEXT: s_max_u32 s0, s0, s1
555+
; GCN-NEXT: s_max_u32 s0, s4, s5
556556
; GCN-NEXT: v_mov_b32_e32 v0, 0
557557
; GCN-NEXT: s_cmpk_lt_u32 s0, 0x3e9
558558
; GCN-NEXT: v_mov_b32_e32 v1, 0
@@ -572,7 +572,7 @@ define amdgpu_gfx void @test39(i32 inreg %arg1, i32 inreg %arg2) {
572572
; GCN-LABEL: test39:
573573
; GCN: ; %bb.0:
574574
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575-
; GCN-NEXT: s_min_i32 s0, s0, s1
575+
; GCN-NEXT: s_min_i32 s0, s4, s5
576576
; GCN-NEXT: v_mov_b32_e32 v0, 0
577577
; GCN-NEXT: s_cmpk_gt_i32 s0, 0x3e7
578578
; GCN-NEXT: v_mov_b32_e32 v1, 0
@@ -592,9 +592,9 @@ define amdgpu_gfx void @test40(i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3
592592
; GCN-LABEL: test40:
593593
; GCN: ; %bb.0:
594594
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595-
; GCN-NEXT: s_max_i32 s0, s0, s1
595+
; GCN-NEXT: s_max_i32 s0, s4, s5
596596
; GCN-NEXT: v_mov_b32_e32 v0, 0
597-
; GCN-NEXT: s_cmp_le_i32 s0, s2
597+
; GCN-NEXT: s_cmp_le_i32 s0, s6
598598
; GCN-NEXT: v_mov_b32_e32 v1, 0
599599
; GCN-NEXT: s_cselect_b32 s0, -1, 0
600600
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
@@ -612,9 +612,9 @@ define amdgpu_gfx void @test41(i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3
612612
; GCN-LABEL: test41:
613613
; GCN: ; %bb.0:
614614
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615-
; GCN-NEXT: s_min_u32 s0, s0, s1
615+
; GCN-NEXT: s_min_u32 s0, s4, s5
616616
; GCN-NEXT: v_mov_b32_e32 v0, 0
617-
; GCN-NEXT: s_cmp_ge_u32 s0, s2
617+
; GCN-NEXT: s_cmp_ge_u32 s0, s6
618618
; GCN-NEXT: v_mov_b32_e32 v1, 0
619619
; GCN-NEXT: s_cselect_b32 s0, -1, 0
620620
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0

llvm/test/CodeGen/AMDGPU/function-args-inreg.ll

Lines changed: 0 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -2176,93 +2176,6 @@ define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr)
21762176
declare void @extern()
21772177

21782178
define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2179-
; GFX9-LABEL: void_func_a13i32_inreg:
2180-
; GFX9: ; %bb.0:
2181-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182-
; GFX9-NEXT: s_mov_b32 s27, s33
2183-
; GFX9-NEXT: s_mov_b32 s33, s32
2184-
; GFX9-NEXT: s_or_saveexec_b64 s[28:29], -1
2185-
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2186-
; GFX9-NEXT: s_mov_b64 exec, s[28:29]
2187-
; GFX9-NEXT: v_mov_b32_e32 v2, s26
2188-
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48
2189-
; GFX9-NEXT: v_mov_b32_e32 v5, s25
2190-
; GFX9-NEXT: v_mov_b32_e32 v4, s24
2191-
; GFX9-NEXT: v_mov_b32_e32 v3, s23
2192-
; GFX9-NEXT: v_mov_b32_e32 v2, s22
2193-
; GFX9-NEXT: s_addk_i32 s32, 0x400
2194-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32
2195-
; GFX9-NEXT: v_writelane_b32 v40, s27, 2
2196-
; GFX9-NEXT: v_mov_b32_e32 v5, s21
2197-
; GFX9-NEXT: v_mov_b32_e32 v4, s20
2198-
; GFX9-NEXT: v_mov_b32_e32 v3, s19
2199-
; GFX9-NEXT: v_mov_b32_e32 v2, s18
2200-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
2201-
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2202-
; GFX9-NEXT: v_mov_b32_e32 v5, s17
2203-
; GFX9-NEXT: v_mov_b32_e32 v4, s16
2204-
; GFX9-NEXT: s_getpc_b64 s[16:17]
2205-
; GFX9-NEXT: s_add_u32 s16, s16, extern@gotpcrel32@lo+4
2206-
; GFX9-NEXT: s_addc_u32 s17, s17, extern@gotpcrel32@hi+12
2207-
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
2208-
; GFX9-NEXT: v_mov_b32_e32 v3, s7
2209-
; GFX9-NEXT: v_mov_b32_e32 v2, s6
2210-
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2211-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
2212-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2213-
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
2214-
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2215-
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2216-
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
2217-
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2218-
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2219-
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2220-
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2221-
; GFX9-NEXT: s_mov_b32 s33, s4
2222-
; GFX9-NEXT: s_waitcnt vmcnt(0)
2223-
; GFX9-NEXT: s_setpc_b64 s[30:31]
2224-
;
2225-
; GFX11-LABEL: void_func_a13i32_inreg:
2226-
; GFX11: ; %bb.0:
2227-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2228-
; GFX11-NEXT: s_mov_b32 s23, s33
2229-
; GFX11-NEXT: s_mov_b32 s33, s32
2230-
; GFX11-NEXT: s_or_saveexec_b32 s24, -1
2231-
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2232-
; GFX11-NEXT: s_mov_b32 exec_lo, s24
2233-
; GFX11-NEXT: s_add_i32 s32, s32, 16
2234-
; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v3, s19
2235-
; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v9, s17
2236-
; GFX11-NEXT: s_getpc_b64 s[18:19]
2237-
; GFX11-NEXT: s_add_u32 s18, s18, extern@gotpcrel32@lo+4
2238-
; GFX11-NEXT: s_addc_u32 s19, s19, extern@gotpcrel32@hi+12
2239-
; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v7, s7
2240-
; GFX11-NEXT: s_load_b64 s[16:17], s[18:19], 0x0
2241-
; GFX11-NEXT: v_writelane_b32 v40, s23, 2
2242-
; GFX11-NEXT: v_dual_mov_b32 v14, s22 :: v_dual_mov_b32 v5, s21
2243-
; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v13, s3
2244-
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2245-
; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
2246-
; GFX11-NEXT: v_mov_b32_e32 v10, s0
2247-
; GFX11-NEXT: s_clause 0x3
2248-
; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48
2249-
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32
2250-
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
2251-
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off
2252-
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2253-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2254-
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
2255-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2256-
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2257-
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2258-
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2259-
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2260-
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2261-
; GFX11-NEXT: s_mov_b32 exec_lo, s1
2262-
; GFX11-NEXT: s_add_i32 s32, s32, -16
2263-
; GFX11-NEXT: s_mov_b32 s33, s0
2264-
; GFX11-NEXT: s_waitcnt vmcnt(0)
2265-
; GFX11-NEXT: s_setpc_b64 s[30:31]
22662179
store [13 x i32] %arg0, ptr addrspace(1) %ptr
22672180
call void @extern()
22682181
ret void
@@ -2290,52 +2203,6 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p
22902203

22912204
; FIXME: Should still fail
22922205
define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2293-
; GFX9-LABEL: void_func_a16i32_inreg__noimplicit:
2294-
; GFX9: ; %bb.0:
2295-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2296-
; GFX9-NEXT: v_mov_b32_e32 v5, s19
2297-
; GFX9-NEXT: v_mov_b32_e32 v4, s18
2298-
; GFX9-NEXT: v_mov_b32_e32 v3, s17
2299-
; GFX9-NEXT: v_mov_b32_e32 v2, s16
2300-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:48
2301-
; GFX9-NEXT: s_nop 0
2302-
; GFX9-NEXT: v_mov_b32_e32 v5, s15
2303-
; GFX9-NEXT: v_mov_b32_e32 v4, s14
2304-
; GFX9-NEXT: v_mov_b32_e32 v3, s13
2305-
; GFX9-NEXT: v_mov_b32_e32 v2, s12
2306-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32
2307-
; GFX9-NEXT: s_nop 0
2308-
; GFX9-NEXT: v_mov_b32_e32 v5, s11
2309-
; GFX9-NEXT: v_mov_b32_e32 v4, s10
2310-
; GFX9-NEXT: v_mov_b32_e32 v3, s9
2311-
; GFX9-NEXT: v_mov_b32_e32 v2, s8
2312-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
2313-
; GFX9-NEXT: s_nop 0
2314-
; GFX9-NEXT: v_mov_b32_e32 v5, s7
2315-
; GFX9-NEXT: v_mov_b32_e32 v4, s6
2316-
; GFX9-NEXT: v_mov_b32_e32 v3, s5
2317-
; GFX9-NEXT: v_mov_b32_e32 v2, s4
2318-
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
2319-
; GFX9-NEXT: s_waitcnt vmcnt(0)
2320-
; GFX9-NEXT: s_setpc_b64 s[30:31]
2321-
;
2322-
; GFX11-LABEL: void_func_a16i32_inreg__noimplicit:
2323-
; GFX11: ; %bb.0:
2324-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325-
; GFX11-NEXT: v_dual_mov_b32 v5, s15 :: v_dual_mov_b32 v4, s14
2326-
; GFX11-NEXT: v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v2, s12
2327-
; GFX11-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v8, s10
2328-
; GFX11-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v6, s8
2329-
; GFX11-NEXT: v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
2330-
; GFX11-NEXT: v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
2331-
; GFX11-NEXT: v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2
2332-
; GFX11-NEXT: v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0
2333-
; GFX11-NEXT: s_clause 0x3
2334-
; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:48
2335-
; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:32
2336-
; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off offset:16
2337-
; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off
2338-
; GFX11-NEXT: s_setpc_b64 s[30:31]
23392206
store [16 x i32] %arg0, ptr addrspace(1) %ptr
23402207
ret void
23412208
}

0 commit comments

Comments
 (0)