@@ -29,8 +29,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inre
29
29
ret void
30
30
}
31
31
32
- ; FIXME: Setup s32.
33
-
34
32
define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call (<4 x i32 > inreg %sgpr , <4 x i32 > %vgpr ) {
35
33
; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
36
34
; GISEL-GFX11: ; %bb.0:
@@ -41,7 +39,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr,
41
39
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
42
40
; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
43
41
; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
44
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
42
+ ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
45
43
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
46
44
; GISEL-GFX11-NEXT: s_endpgm
47
45
;
@@ -60,6 +58,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr,
60
58
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
61
59
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
62
60
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
61
+ ; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
63
62
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
64
63
; GISEL-GFX10-NEXT: s_endpgm
65
64
;
@@ -72,7 +71,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr,
72
71
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
73
72
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
74
73
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
75
- ; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
74
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
76
75
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
77
76
; DAGISEL-GFX11-NEXT: s_endpgm
78
77
;
@@ -91,18 +90,19 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr,
91
90
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
92
91
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
93
92
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
93
+ ; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
94
94
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
95
95
; DAGISEL-GFX10-NEXT: s_endpgm
96
96
call amdgpu_gfx void @use (<4 x i32 > %sgpr , <4 x i32 > %vgpr )
97
97
ret void
98
98
}
99
99
100
- ; FIXME: Setup s32.
101
-
102
100
define amdgpu_cs_chain void @amdgpu_cs_chain_spill (<24 x i32 > inreg %sgprs , <24 x i32 > %vgprs ) {
103
101
; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
104
102
; GISEL-GFX11: ; %bb.0:
105
103
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104
+ ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
105
+ ; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
106
106
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4
107
107
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
108
108
; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24
@@ -123,7 +123,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
123
123
; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
124
124
; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
125
125
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40
126
- ; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
127
126
; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
128
127
; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
129
128
; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
@@ -171,6 +170,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
171
170
; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13
172
171
; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14
173
172
; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15
173
+ ; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
174
174
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
175
175
; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
176
176
; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
@@ -229,6 +229,8 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
229
229
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
230
230
; DAGISEL-GFX11: ; %bb.0:
231
231
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
233
+ ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
232
234
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60
233
235
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
234
236
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24
@@ -249,7 +251,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
249
251
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
250
252
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
251
253
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24
252
- ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
253
254
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12
254
255
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10
255
256
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8
@@ -297,6 +298,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24
297
298
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10
298
299
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9
299
300
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8
301
+ ; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
300
302
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
301
303
; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
302
304
; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
@@ -796,6 +798,81 @@ define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i
796
798
unreachable
797
799
}
798
800
801
+ define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack (i32 %idx ) {
802
+ ; GISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
803
+ ; GISEL-GFX11: ; %bb.0:
804
+ ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805
+ ; GISEL-GFX11-NEXT: s_mov_b32 s3, 4
806
+ ; GISEL-GFX11-NEXT: s_mov_b32 s2, 3
807
+ ; GISEL-GFX11-NEXT: s_mov_b32 s1, 2
808
+ ; GISEL-GFX11-NEXT: s_mov_b32 s0, 1
809
+ ; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8
810
+ ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
811
+ ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
812
+ ; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, s32, v0
813
+ ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
814
+ ; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
815
+ ; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
816
+ ; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
817
+ ; GISEL-GFX11-NEXT: s_endpgm
818
+ ;
819
+ ; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
820
+ ; GISEL-GFX10: ; %bb.0:
821
+ ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822
+ ; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
823
+ ; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
824
+ ; GISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s32
825
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
826
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
827
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
828
+ ; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
829
+ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
830
+ ; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
831
+ ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
832
+ ; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
833
+ ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
834
+ ; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
835
+ ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
836
+ ; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
837
+ ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
838
+ ; GISEL-GFX10-NEXT: s_endpgm
839
+ ;
840
+ ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
841
+ ; DAGISEL-GFX11: ; %bb.0:
842
+ ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843
+ ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
844
+ ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
845
+ ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
846
+ ; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, s32
847
+ ; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
848
+ ; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
849
+ ; DAGISEL-GFX11-NEXT: s_endpgm
850
+ ;
851
+ ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
852
+ ; DAGISEL-GFX10: ; %bb.0:
853
+ ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854
+ ; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
855
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
856
+ ; DAGISEL-GFX10-NEXT: v_lshrrev_b32_e64 v2, 5, s32
857
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
858
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
859
+ ; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, v2
860
+ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
861
+ ; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
862
+ ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
863
+ ; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
864
+ ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
865
+ ; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
866
+ ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
867
+ ; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen
868
+ ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
869
+ ; DAGISEL-GFX10-NEXT: s_endpgm
870
+ %alloca.align32 = alloca [8 x <4 x i32 >], align 32 , addrspace (5 )
871
+ %gep0 = getelementptr inbounds [8 x <4 x i32 >], ptr addrspace (5 ) %alloca.align32 , i32 0 , i32 %idx
872
+ store volatile <4 x i32 > <i32 1 , i32 2 , i32 3 , i32 4 >, ptr addrspace (5 ) %gep0 , align 32
873
+ ret void
874
+ }
875
+
799
876
declare void @llvm.amdgcn.cs.chain.v2i32 (ptr , i32 , <2 x i32 >, <2 x i32 >, i32 , ...)
800
877
declare void @llvm.amdgcn.cs.chain.v3i32 (ptr , i32 , <3 x i32 >, <3 x i32 >, i32 , ...)
801
878
declare void @llvm.amdgcn.cs.chain.v4i32 (ptr , i32 , <4 x i32 >, <4 x i32 >, i32 , ...)
0 commit comments