41
41
ret void
42
42
}
43
43
44
+ define amdgpu_kernel void @high_register_collision() {
45
+ ret void
46
+ }
47
+
44
48
...
45
49
---
46
50
@@ -57,27 +61,31 @@ name: flat_zero_waitcnt
57
61
body : |
58
62
; GCN-LABEL: name: flat_zero_waitcnt
59
63
; GCN: bb.0:
60
- ; GCN: successors: %bb.1(0x80000000)
61
- ; GCN: S_WAITCNT 0
62
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
63
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
64
- ; GCN: S_WAITCNT 3953
65
- ; GCN: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
66
- ; GCN: S_BRANCH %bb.1
67
- ; GCN: bb.1:
68
- ; GCN: successors: %bb.2(0x80000000)
69
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
70
- ; GCN: S_WAITCNT 3952
71
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
72
- ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
73
- ; GCN: S_BRANCH %bb.2
74
- ; GCN: bb.2:
75
- ; GCN: S_WAITCNT 49279
76
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
77
- ; GCN: S_WAITCNT 3952
78
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
79
- ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
80
- ; GCN: S_ENDPGM 0
64
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
65
+ ; GCN-NEXT: {{ $}}
66
+ ; GCN-NEXT: S_WAITCNT 0
67
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
68
+ ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
69
+ ; GCN-NEXT: S_WAITCNT 3953
70
+ ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
71
+ ; GCN-NEXT: S_BRANCH %bb.1
72
+ ; GCN-NEXT: {{ $}}
73
+ ; GCN-NEXT: bb.1:
74
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
75
+ ; GCN-NEXT: {{ $}}
76
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
77
+ ; GCN-NEXT: S_WAITCNT 3952
78
+ ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
79
+ ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
80
+ ; GCN-NEXT: S_BRANCH %bb.2
81
+ ; GCN-NEXT: {{ $}}
82
+ ; GCN-NEXT: bb.2:
83
+ ; GCN-NEXT: S_WAITCNT 49279
84
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
85
+ ; GCN-NEXT: S_WAITCNT 3952
86
+ ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
87
+ ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
88
+ ; GCN-NEXT: S_ENDPGM 0
81
89
bb.0:
82
90
successors: %bb.1
83
91
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
@@ -108,14 +116,16 @@ name: single_fallthrough_successor_no_end_block_wait
108
116
body : |
109
117
; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
110
118
; GCN: bb.0:
111
- ; GCN: successors: %bb.1(0x80000000)
112
- ; GCN: S_WAITCNT 0
113
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
114
- ; GCN: bb.1:
115
- ; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
116
- ; GCN: S_WAITCNT 112
117
- ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
118
- ; GCN: S_ENDPGM 0
119
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
120
+ ; GCN-NEXT: {{ $}}
121
+ ; GCN-NEXT: S_WAITCNT 0
122
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
123
+ ; GCN-NEXT: {{ $}}
124
+ ; GCN-NEXT: bb.1:
125
+ ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
126
+ ; GCN-NEXT: S_WAITCNT 112
127
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
128
+ ; GCN-NEXT: S_ENDPGM 0
119
129
bb.0:
120
130
successors: %bb.1
121
131
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
@@ -137,18 +147,21 @@ name: single_branch_successor_not_next_block
137
147
body : |
138
148
; GCN-LABEL: name: single_branch_successor_not_next_block
139
149
; GCN: bb.0:
140
- ; GCN: successors: %bb.2(0x80000000)
141
- ; GCN: S_WAITCNT 0
142
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
143
- ; GCN: S_BRANCH %bb.2
144
- ; GCN: bb.1:
145
- ; GCN: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
146
- ; GCN: S_ENDPGM 0
147
- ; GCN: bb.2:
148
- ; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
149
- ; GCN: S_WAITCNT 112
150
- ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
151
- ; GCN: S_ENDPGM 0
150
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
151
+ ; GCN-NEXT: {{ $}}
152
+ ; GCN-NEXT: S_WAITCNT 0
153
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
154
+ ; GCN-NEXT: S_BRANCH %bb.2
155
+ ; GCN-NEXT: {{ $}}
156
+ ; GCN-NEXT: bb.1:
157
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
158
+ ; GCN-NEXT: S_ENDPGM 0
159
+ ; GCN-NEXT: {{ $}}
160
+ ; GCN-NEXT: bb.2:
161
+ ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
162
+ ; GCN-NEXT: S_WAITCNT 112
163
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
164
+ ; GCN-NEXT: S_ENDPGM 0
152
165
bb.0:
153
166
successors: %bb.2
154
167
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
@@ -192,13 +205,14 @@ body: |
192
205
liveins: $vgpr1_vgpr2
193
206
; GCN-LABEL: name: bundle_no_waitcnt
194
207
; GCN: liveins: $vgpr1_vgpr2
195
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
196
- ; GCN: BUNDLE {
197
- ; GCN: S_NOP 0
198
- ; GCN: S_NOP 0
199
- ; GCN: }
200
- ; GCN: S_WAITCNT 112
201
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
208
+ ; GCN-NEXT: {{ $}}
209
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
210
+ ; GCN-NEXT: BUNDLE {
211
+ ; GCN-NEXT: S_NOP 0
212
+ ; GCN-NEXT: S_NOP 0
213
+ ; GCN-NEXT: }
214
+ ; GCN-NEXT: S_WAITCNT 112
215
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
202
216
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
203
217
BUNDLE {
204
218
S_NOP 0
@@ -220,12 +234,13 @@ body: |
220
234
liveins: $vgpr1_vgpr2
221
235
; GCN-LABEL: name: preexisting_waitcnt_in_bundle
222
236
; GCN: liveins: $vgpr1_vgpr2
223
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
224
- ; GCN: BUNDLE {
225
- ; GCN: S_NOP 0
226
- ; GCN: S_WAITCNT 0
227
- ; GCN: }
228
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
237
+ ; GCN-NEXT: {{ $}}
238
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
239
+ ; GCN-NEXT: BUNDLE {
240
+ ; GCN-NEXT: S_NOP 0
241
+ ; GCN-NEXT: S_WAITCNT 0
242
+ ; GCN-NEXT: }
243
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
229
244
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
230
245
BUNDLE {
231
246
S_NOP 0
@@ -248,11 +263,12 @@ body: |
248
263
liveins: $vgpr1_vgpr2
249
264
; GCN-LABEL: name: insert_in_bundle
250
265
; GCN: liveins: $vgpr1_vgpr2
251
- ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
252
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
253
- ; GCN: S_WAITCNT 112
254
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
255
- ; GCN: }
266
+ ; GCN-NEXT: {{ $}}
267
+ ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
268
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
269
+ ; GCN-NEXT: S_WAITCNT 112
270
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
271
+ ; GCN-NEXT: }
256
272
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
257
273
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
258
274
FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
@@ -273,11 +289,12 @@ body: |
273
289
liveins: $vgpr1_vgpr2
274
290
; GCN-LABEL: name: exit_bundle
275
291
; GCN: liveins: $vgpr1_vgpr2
276
- ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
277
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
278
- ; GCN: }
279
- ; GCN: S_WAITCNT 112
280
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
292
+ ; GCN-NEXT: {{ $}}
293
+ ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
294
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
295
+ ; GCN-NEXT: }
296
+ ; GCN-NEXT: S_WAITCNT 112
297
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
281
298
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
282
299
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
283
300
}
@@ -300,17 +317,43 @@ body: |
300
317
liveins: $vgpr1_vgpr2
301
318
; GCN-LABEL: name: cross_bundle
302
319
; GCN: liveins: $vgpr1_vgpr2
303
- ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
304
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
305
- ; GCN: }
306
- ; GCN: S_WAITCNT 112
307
- ; GCN: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
308
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
309
- ; GCN: }
320
+ ; GCN-NEXT: {{ $}}
321
+ ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
322
+ ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
323
+ ; GCN-NEXT: }
324
+ ; GCN-NEXT: S_WAITCNT 112
325
+ ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
326
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
327
+ ; GCN-NEXT: }
310
328
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
311
329
$agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
312
330
}
313
331
BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
314
332
FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
315
333
}
316
334
...
335
+
336
+ ---
337
+ # agpr should be disjoint and tracked separately from vgpr
338
+ # vgpr226 and agpr0 erroneously share waitcnt storage index, so a waitcnt is inserted before store of agpr0 when it is not needed
339
+
340
+ name : high_register_collision
341
+
342
+ body : |
343
+ bb.0:
344
+ ; GCN-LABEL: name: high_register_collision
345
+ ; GCN: S_WAITCNT 0
346
+ ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
347
+ ; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
348
+ ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
349
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
350
+ ; GCN-NEXT: S_WAITCNT 112
351
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
352
+ ; GCN-NEXT: S_ENDPGM 0
353
+ $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
354
+ $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
355
+ $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
356
+ FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
357
+ FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
358
+ S_ENDPGM 0
359
+ ...
0 commit comments