1
1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2
- # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass =si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
2
+ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after =si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
3
3
4
4
# This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
5
5
# @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -13,17 +13,11 @@ name: stack-slot-share-equal-sized-spills
13
13
tracksRegLiveness : true
14
14
frameInfo :
15
15
adjustsStack : true
16
- stack :
17
- - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
18
- - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
19
- - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
20
- - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
21
- - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
16
+ hasCalls : true
22
17
machineFunctionInfo :
23
- hasSpilledSGPRs : true
24
- scratchRSrcReg : ' $sgpr0_sgpr1_sgpr2_sgpr3'
25
- frameOffsetReg : ' $sgpr32'
26
- stackPtrOffsetReg : ' $sgpr32'
18
+ scratchRSrcReg : $sgpr0_sgpr1_sgpr2_sgpr3
19
+ frameOffsetReg : $sgpr32
20
+ stackPtrOffsetReg : $sgpr32
27
21
body : |
28
22
bb.0:
29
23
; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
@@ -60,45 +54,36 @@ body: |
60
54
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
61
55
; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
62
56
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
63
- SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
64
- SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
65
- %2:vreg_64 = IMPLICIT_DEF
66
- %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
67
- renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
68
- SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
57
+ %0:sreg_32_xm0 = COPY $sgpr32
58
+ %5:sreg_64 = COPY $sgpr0_sgpr1
59
+ %1:vreg_64 = IMPLICIT_DEF
60
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
61
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
69
62
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
70
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
71
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
72
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
73
- SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
74
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
75
- SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
63
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
64
+ $sgpr32 = COPY %0
65
+ %4:sreg_32_xm0 = COPY $sgpr32
66
+ $sgpr0_sgpr1 = COPY %5
67
+ %6:sreg_64 = COPY $sgpr2_sgpr3
76
68
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
77
69
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
78
- $vgpr0 = COPY %3
79
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
80
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
81
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
82
- $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
70
+ $vgpr0 = COPY %2
71
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
72
+ $sgpr32 = COPY %4
73
+ $sgpr2_sgpr3 = COPY %6
83
74
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
84
-
85
75
...
76
+
86
77
---
87
78
name : stack-slot-share-unequal-sized-spills-with-large-spill-first
88
79
tracksRegLiveness : true
89
80
frameInfo :
90
81
adjustsStack : true
91
- stack :
92
- - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
93
- - { id: 1, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
94
- - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
95
- - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
96
- - { id: 4, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
82
+ hasCalls : true
97
83
machineFunctionInfo :
98
- hasSpilledSGPRs : true
99
- scratchRSrcReg : ' $sgpr0_sgpr1_sgpr2_sgpr3'
100
- frameOffsetReg : ' $sgpr32'
101
- stackPtrOffsetReg : ' $sgpr32'
84
+ scratchRSrcReg : $sgpr0_sgpr1_sgpr2_sgpr3
85
+ frameOffsetReg : $sgpr32
86
+ stackPtrOffsetReg : $sgpr32
102
87
body : |
103
88
bb.0:
104
89
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
@@ -133,45 +118,36 @@ body: |
133
118
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
134
119
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
135
120
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
136
- SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
137
- SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
138
- %2:vreg_64 = IMPLICIT_DEF
139
- %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
140
- renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
141
- SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
121
+ %0:sreg_32_xm0 = COPY $sgpr32
122
+ %5:sreg_64 = COPY $sgpr2_sgpr3
123
+ %1:vreg_64 = IMPLICIT_DEF
124
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
125
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
142
126
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
143
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
144
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
145
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
146
- SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
147
- $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
148
- SI_SPILL_S32_SAVE $sgpr2, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.4, addrspace 5)
127
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
128
+ $sgpr32 = COPY %0
129
+ %4:sreg_32_xm0 = COPY $sgpr32
130
+ $sgpr2_sgpr3 = COPY %5
131
+ %6:sreg_32 = COPY $sgpr2
149
132
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
150
133
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
151
- $vgpr0 = COPY %3
152
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
153
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
154
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
155
- $sgpr2 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.4, addrspace 5)
134
+ $vgpr0 = COPY %2
135
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
136
+ $sgpr32 = COPY %4
137
+ $sgpr2 = COPY %6
156
138
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
157
-
158
139
...
140
+
159
141
---
160
142
name : stack-slot-share-unequal-sized-spills-with-small-spill-first
161
143
tracksRegLiveness : true
162
144
frameInfo :
163
145
adjustsStack : true
164
- stack :
165
- - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
166
- - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
167
- - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
168
- - { id: 3, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
169
- - { id: 4, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
146
+ hasCalls : true
170
147
machineFunctionInfo :
171
- hasSpilledSGPRs : true
172
- scratchRSrcReg : ' $sgpr0_sgpr1_sgpr2_sgpr3'
173
- frameOffsetReg : ' $sgpr32'
174
- stackPtrOffsetReg : ' $sgpr32'
148
+ scratchRSrcReg : $sgpr0_sgpr1_sgpr2_sgpr3
149
+ frameOffsetReg : $sgpr32
150
+ stackPtrOffsetReg : $sgpr32
175
151
body : |
176
152
bb.0:
177
153
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
@@ -206,26 +182,22 @@ body: |
206
182
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
207
183
; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
208
184
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
209
- SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
210
- SI_SPILL_S32_SAVE $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
211
- %2:vreg_64 = IMPLICIT_DEF
212
- %3:vgpr_32 = FLAT_LOAD_DWORD %2, 0, 0, implicit $exec, implicit $flat_scr
213
- renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
214
- SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
185
+ %0:sreg_32_xm0 = COPY $sgpr32
186
+ %5:sreg_32 = COPY $sgpr0
187
+ %1:vreg_64 = IMPLICIT_DEF
188
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
189
+ %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
215
190
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
216
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
217
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
218
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
219
- SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
220
- $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
221
- SI_SPILL_S64_SAVE $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
191
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit undef $vgpr0
192
+ $sgpr32 = COPY %0
193
+ %4:sreg_32_xm0 = COPY $sgpr32
194
+ $sgpr0 = COPY %5
195
+ %6:sreg_64 = COPY $sgpr2_sgpr3
222
196
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
223
197
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
224
- $vgpr0 = COPY %3
225
- renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
226
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
227
- $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
228
- $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
198
+ $vgpr0 = COPY %2
199
+ dead $sgpr30_sgpr31 = SI_CALL %3, 0, csr_amdgpu, implicit killed $vgpr0
200
+ $sgpr32 = COPY %4
201
+ $sgpr2_sgpr3 = COPY %6
229
202
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
230
-
231
203
...
0 commit comments