@@ -25,15 +25,15 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
25
25
; GCN-NEXT: v_mov_b32_e32 v6, s15
26
26
; GCN-NEXT: v_mov_b32_e32 v8, s16
27
27
; GCN-NEXT: v_mov_b32_e32 v10, s17
28
+ ; GCN-NEXT: v_mov_b32_e32 v12, s18
29
+ ; GCN-NEXT: v_mov_b32_e32 v14, s19
28
30
; GCN-NEXT: s_movk_i32 s5, 0x60
29
31
; GCN-NEXT: v_add_u32_e32 v2, 8, v0
30
32
; GCN-NEXT: v_add_u32_e32 v3, 12, v0
31
33
; GCN-NEXT: v_add_u32_e32 v7, 16, v0
32
34
; GCN-NEXT: v_add_u32_e32 v9, 20, v0
33
35
; GCN-NEXT: v_add_u32_e32 v11, 24, v0
34
- ; GCN-NEXT: v_mov_b32_e32 v12, s18
35
36
; GCN-NEXT: v_add_u32_e32 v13, 28, v0
36
- ; GCN-NEXT: v_mov_b32_e32 v14, s19
37
37
; GCN-NEXT: v_add_u32_e32 v15, 32, v0
38
38
; GCN-NEXT: v_mov_b32_e32 v16, s20
39
39
; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
@@ -71,7 +71,7 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
71
71
; GCN-NEXT: buffer_store_dword v28, v27, s[0:3], 0 offen
72
72
; GCN-NEXT: buffer_store_dword v30, v29, s[0:3], 0 offen
73
73
; GCN-NEXT: buffer_store_dword v32, v31, s[0:3], 0 offen
74
- ; GCN-NEXT: s_movk_i32 s10 , 0x70
74
+ ; GCN-NEXT: s_movk_i32 s13 , 0x70
75
75
; GCN-NEXT: v_add_u32_e32 v35, 0x48, v0
76
76
; GCN-NEXT: v_mov_b32_e32 v36, s70
77
77
; GCN-NEXT: v_add_u32_e32 v37, 0x4c, v0
@@ -96,19 +96,19 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
96
96
; GCN-NEXT: v_add_u32_e32 v26, 0x64, v0
97
97
; GCN-NEXT: v_mov_b32_e32 v14, s77
98
98
; GCN-NEXT: v_mov_b32_e32 v4, s81
99
- ; GCN-NEXT: s_movk_i32 s11 , 0x90
100
- ; GCN-NEXT: s_movk_i32 s13 , 0xa0
99
+ ; GCN-NEXT: s_movk_i32 s14 , 0x90
100
+ ; GCN-NEXT: s_movk_i32 s15 , 0xa0
101
101
; GCN-NEXT: v_add_u32_e32 v28, 0x68, v0
102
102
; GCN-NEXT: v_mov_b32_e32 v16, s78
103
103
; GCN-NEXT: v_add_u32_e32 v30, 0x6c, v0
104
104
; GCN-NEXT: v_mov_b32_e32 v18, s79
105
+ ; GCN-NEXT: v_add_u32_e32 v32, s13, v0
105
106
; GCN-NEXT: v_mov_b32_e32 v20, s80
106
- ; GCN-NEXT: v_mov_b32_e32 v5, s82
107
- ; GCN-NEXT: v_mov_b32_e32 v6, s83
108
- ; GCN-NEXT: v_add_u32_e32 v32, s10, v0
109
107
; GCN-NEXT: v_add_u32_e32 v34, 0x74, v0
110
108
; GCN-NEXT: v_add_u32_e32 v36, 0x78, v0
109
+ ; GCN-NEXT: v_mov_b32_e32 v5, s82
111
110
; GCN-NEXT: v_add_u32_e32 v43, 0x7c, v0
111
+ ; GCN-NEXT: v_mov_b32_e32 v6, s83
112
112
; GCN-NEXT: v_add_u32_e32 v44, 0x80, v0
113
113
; GCN-NEXT: v_mov_b32_e32 v8, s52
114
114
; GCN-NEXT: buffer_store_dword v14, v26, s[0:3], 0 offen
@@ -121,20 +121,20 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
121
121
; GCN-NEXT: buffer_store_dword v8, v44, s[0:3], 0 offen
122
122
; GCN-NEXT: v_add_u32_e32 v45, 0x84, v0
123
123
; GCN-NEXT: v_mov_b32_e32 v4, s53
124
- ; GCN-NEXT: s_movk_i32 s14 , 0xb0
124
+ ; GCN-NEXT: s_movk_i32 s16 , 0xb0
125
125
; GCN-NEXT: v_add_u32_e32 v46, 0x88, v0
126
126
; GCN-NEXT: v_mov_b32_e32 v5, s54
127
127
; GCN-NEXT: v_add_u32_e32 v47, 0x8c, v0
128
128
; GCN-NEXT: v_mov_b32_e32 v6, s55
129
- ; GCN-NEXT: v_add_u32_e32 v48, s11 , v0
129
+ ; GCN-NEXT: v_add_u32_e32 v48, s14 , v0
130
130
; GCN-NEXT: v_mov_b32_e32 v8, s56
131
131
; GCN-NEXT: v_add_u32_e32 v49, 0x94, v0
132
132
; GCN-NEXT: v_mov_b32_e32 v10, s57
133
133
; GCN-NEXT: v_add_u32_e32 v50, 0x98, v0
134
134
; GCN-NEXT: v_mov_b32_e32 v12, s58
135
135
; GCN-NEXT: v_add_u32_e32 v51, 0x9c, v0
136
136
; GCN-NEXT: v_mov_b32_e32 v14, s59
137
- ; GCN-NEXT: v_add_u32_e32 v52, s13 , v0
137
+ ; GCN-NEXT: v_add_u32_e32 v52, s15 , v0
138
138
; GCN-NEXT: v_mov_b32_e32 v16, s60
139
139
; GCN-NEXT: buffer_store_dword v4, v45, s[0:3], 0 offen
140
140
; GCN-NEXT: buffer_store_dword v5, v46, s[0:3], 0 offen
@@ -146,13 +146,13 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
146
146
; GCN-NEXT: buffer_store_dword v16, v52, s[0:3], 0 offen
147
147
; GCN-NEXT: v_add_u32_e32 v53, 0xa4, v0
148
148
; GCN-NEXT: v_mov_b32_e32 v4, s61
149
- ; GCN-NEXT: s_movk_i32 s15 , 0xd0
150
- ; GCN-NEXT: s_movk_i32 s16 , 0xe0
149
+ ; GCN-NEXT: s_movk_i32 s17 , 0xd0
150
+ ; GCN-NEXT: s_movk_i32 s18 , 0xe0
151
151
; GCN-NEXT: v_add_u32_e32 v54, 0xa8, v0
152
152
; GCN-NEXT: v_mov_b32_e32 v5, s62
153
153
; GCN-NEXT: v_add_u32_e32 v55, 0xac, v0
154
154
; GCN-NEXT: v_mov_b32_e32 v6, s63
155
- ; GCN-NEXT: v_add_u32_e32 v56, s14 , v0
155
+ ; GCN-NEXT: v_add_u32_e32 v56, s16 , v0
156
156
; GCN-NEXT: v_mov_b32_e32 v8, s64
157
157
; GCN-NEXT: v_add_u32_e32 v57, 0xb4, v0
158
158
; GCN-NEXT: v_mov_b32_e32 v10, s65
@@ -173,20 +173,20 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
173
173
; GCN-NEXT: v_add_u32_e32 v61, 0xc4, v0
174
174
; GCN-NEXT: v_mov_b32_e32 v4, s37
175
175
; GCN-NEXT: s_and_b32 s7, s7, 63
176
- ; GCN-NEXT: s_movk_i32 s17 , 0xf0
176
+ ; GCN-NEXT: s_movk_i32 s19 , 0xf0
177
177
; GCN-NEXT: v_add_u32_e32 v62, 0xc8, v0
178
178
; GCN-NEXT: v_mov_b32_e32 v5, s38
179
179
; GCN-NEXT: v_add_u32_e32 v63, 0xcc, v0
180
180
; GCN-NEXT: v_mov_b32_e32 v6, s39
181
- ; GCN-NEXT: v_add_u32_e32 v64, s15 , v0
181
+ ; GCN-NEXT: v_add_u32_e32 v64, s17 , v0
182
182
; GCN-NEXT: v_mov_b32_e32 v8, s40
183
183
; GCN-NEXT: v_add_u32_e32 v65, 0xd4, v0
184
184
; GCN-NEXT: v_mov_b32_e32 v10, s41
185
185
; GCN-NEXT: v_add_u32_e32 v66, 0xd8, v0
186
186
; GCN-NEXT: v_mov_b32_e32 v12, s42
187
187
; GCN-NEXT: v_add_u32_e32 v67, 0xdc, v0
188
188
; GCN-NEXT: v_mov_b32_e32 v14, s43
189
- ; GCN-NEXT: v_add_u32_e32 v68, s16 , v0
189
+ ; GCN-NEXT: v_add_u32_e32 v68, s18 , v0
190
190
; GCN-NEXT: v_mov_b32_e32 v16, s44
191
191
; GCN-NEXT: buffer_store_dword v4, v61, s[0:3], 0 offen
192
192
; GCN-NEXT: buffer_store_dword v5, v62, s[0:3], 0 offen
@@ -202,7 +202,7 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
202
202
; GCN-NEXT: v_mov_b32_e32 v5, s46
203
203
; GCN-NEXT: v_add_u32_e32 v71, 0xec, v0
204
204
; GCN-NEXT: v_mov_b32_e32 v6, s47
205
- ; GCN-NEXT: v_add_u32_e32 v72, s17 , v0
205
+ ; GCN-NEXT: v_add_u32_e32 v72, s19 , v0
206
206
; GCN-NEXT: v_mov_b32_e32 v8, s48
207
207
; GCN-NEXT: v_add_u32_e32 v73, 0xf4, v0
208
208
; GCN-NEXT: v_mov_b32_e32 v10, s49
@@ -217,9 +217,9 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
217
217
; GCN-NEXT: v_mov_b32_e32 v4, s12
218
218
; GCN-NEXT: s_lshl_b32 s7, s7, 2
219
219
; GCN-NEXT: v_add_u32_e32 v75, 0xfc, v0
220
- ; GCN-NEXT: v_mov_b32_e32 v5 , s51
220
+ ; GCN-NEXT: v_mov_b32_e32 v14 , s51
221
221
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:256
222
- ; GCN-NEXT: buffer_store_dword v5 , v75, s[0:3], 0 offen
222
+ ; GCN-NEXT: buffer_store_dword v14 , v75, s[0:3], 0 offen
223
223
; GCN-NEXT: v_mov_b32_e32 v4, s6
224
224
; GCN-NEXT: v_add_u32_e32 v0, s7, v0
225
225
; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
@@ -289,78 +289,78 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
289
289
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:256
290
290
; GCN-NEXT: s_add_u32 s6, s8, 16
291
291
; GCN-NEXT: s_addc_u32 s7, s9, 0
292
- ; GCN-NEXT: v_mov_b32_e32 v65, s9
293
292
; GCN-NEXT: v_mov_b32_e32 v67, s7
294
293
; GCN-NEXT: v_mov_b32_e32 v66, s6
295
294
; GCN-NEXT: s_add_u32 s6, s8, 32
296
- ; GCN-NEXT: v_mov_b32_e32 v64, s8
297
295
; GCN-NEXT: s_addc_u32 s7, s9, 0
296
+ ; GCN-NEXT: v_mov_b32_e32 v65, s9
297
+ ; GCN-NEXT: s_add_u32 s10, s8, 48
298
+ ; GCN-NEXT: v_mov_b32_e32 v64, s8
299
+ ; GCN-NEXT: s_addc_u32 s11, s9, 0
298
300
; GCN-NEXT: s_waitcnt vmcnt(0)
299
301
; GCN-NEXT: global_store_dwordx4 v[64:65], v[0:3], off
300
302
; GCN-NEXT: global_store_dwordx4 v[66:67], v[4:7], off
301
303
; GCN-NEXT: v_mov_b32_e32 v0, s6
302
304
; GCN-NEXT: v_mov_b32_e32 v1, s7
303
- ; GCN-NEXT: s_add_u32 s6, s8, 48
304
- ; GCN-NEXT: s_addc_u32 s7, s9, 0
305
- ; GCN-NEXT: v_mov_b32_e32 v2, s6
306
- ; GCN-NEXT: v_mov_b32_e32 v3, s7
307
305
; GCN-NEXT: s_add_u32 s6, s8, 64
306
+ ; GCN-NEXT: v_mov_b32_e32 v2, s10
307
+ ; GCN-NEXT: s_addc_u32 s7, s9, 0
308
+ ; GCN-NEXT: v_mov_b32_e32 v3, s11
309
+ ; GCN-NEXT: s_add_u32 s10, s8, s4
310
+ ; GCN-NEXT: s_addc_u32 s11, s9, 0
311
+ ; GCN-NEXT: s_add_u32 s4, s8, s5
308
312
; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
309
313
; GCN-NEXT: global_store_dwordx4 v[2:3], v[12:15], off
310
- ; GCN-NEXT: s_addc_u32 s7, s9, 0
311
314
; GCN-NEXT: v_mov_b32_e32 v0, s6
315
+ ; GCN-NEXT: s_addc_u32 s5, s9, 0
312
316
; GCN-NEXT: v_mov_b32_e32 v1, s7
313
- ; GCN-NEXT: s_add_u32 s6, s8, s4
314
- ; GCN-NEXT: s_addc_u32 s7, s9, 0
315
- ; GCN-NEXT: s_add_u32 s4, s8, s5
316
- ; GCN-NEXT: v_mov_b32_e32 v2, s6
317
- ; GCN-NEXT: v_mov_b32_e32 v3, s7
317
+ ; GCN-NEXT: s_add_u32 s6, s8, s13
318
+ ; GCN-NEXT: v_mov_b32_e32 v2, s10
319
+ ; GCN-NEXT: v_mov_b32_e32 v3, s11
318
320
; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off
319
321
; GCN-NEXT: global_store_dwordx4 v[2:3], v[20:23], off
320
- ; GCN-NEXT: s_addc_u32 s5 , s9, 0
322
+ ; GCN-NEXT: s_addc_u32 s7 , s9, 0
321
323
; GCN-NEXT: v_mov_b32_e32 v0, s4
322
324
; GCN-NEXT: v_mov_b32_e32 v1, s5
323
- ; GCN-NEXT: s_add_u32 s4, s8, s10
324
- ; GCN-NEXT: s_addc_u32 s5, s9, 0
325
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
326
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
327
325
; GCN-NEXT: s_add_u32 s4, s8, 0x80
326
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
327
+ ; GCN-NEXT: s_addc_u32 s5, s9, 0
328
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
329
+ ; GCN-NEXT: s_add_u32 s6, s8, s14
328
330
; GCN-NEXT: global_store_dwordx4 v[0:1], v[24:27], off
329
331
; GCN-NEXT: global_store_dwordx4 v[2:3], v[28:31], off
330
- ; GCN-NEXT: s_addc_u32 s5 , s9, 0
332
+ ; GCN-NEXT: s_addc_u32 s7 , s9, 0
331
333
; GCN-NEXT: v_mov_b32_e32 v0, s4
332
334
; GCN-NEXT: v_mov_b32_e32 v1, s5
333
- ; GCN-NEXT: s_add_u32 s4, s8, s11
335
+ ; GCN-NEXT: s_add_u32 s4, s8, s15
336
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
334
337
; GCN-NEXT: s_addc_u32 s5, s9, 0
335
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
336
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
337
- ; GCN-NEXT: s_add_u32 s4, s8, s13
338
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
339
+ ; GCN-NEXT: s_add_u32 s6, s8, s16
338
340
; GCN-NEXT: global_store_dwordx4 v[0:1], v[32:35], off
339
341
; GCN-NEXT: global_store_dwordx4 v[2:3], v[36:39], off
340
- ; GCN-NEXT: s_addc_u32 s5 , s9, 0
342
+ ; GCN-NEXT: s_addc_u32 s7 , s9, 0
341
343
; GCN-NEXT: v_mov_b32_e32 v0, s4
342
344
; GCN-NEXT: v_mov_b32_e32 v1, s5
343
- ; GCN-NEXT: s_add_u32 s4, s8, s14
344
- ; GCN-NEXT: s_addc_u32 s5, s9, 0
345
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
346
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
347
345
; GCN-NEXT: s_add_u32 s4, s8, 0xc0
346
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
347
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
348
348
; GCN-NEXT: global_store_dwordx4 v[0:1], v[40:43], off
349
349
; GCN-NEXT: global_store_dwordx4 v[2:3], v[44:47], off
350
350
; GCN-NEXT: s_addc_u32 s5, s9, 0
351
351
; GCN-NEXT: v_mov_b32_e32 v0, s4
352
352
; GCN-NEXT: v_mov_b32_e32 v1, s5
353
- ; GCN-NEXT: s_add_u32 s4, s8, s15
353
+ ; GCN-NEXT: s_add_u32 s4, s8, s17
354
354
; GCN-NEXT: s_addc_u32 s5, s9, 0
355
355
; GCN-NEXT: v_mov_b32_e32 v2, s4
356
356
; GCN-NEXT: v_mov_b32_e32 v3, s5
357
- ; GCN-NEXT: s_add_u32 s4, s8, s16
357
+ ; GCN-NEXT: s_add_u32 s4, s8, s18
358
358
; GCN-NEXT: global_store_dwordx4 v[0:1], v[48:51], off
359
359
; GCN-NEXT: global_store_dwordx4 v[2:3], v[52:55], off
360
360
; GCN-NEXT: s_addc_u32 s5, s9, 0
361
361
; GCN-NEXT: v_mov_b32_e32 v0, s4
362
362
; GCN-NEXT: v_mov_b32_e32 v1, s5
363
- ; GCN-NEXT: s_add_u32 s4, s8, s17
363
+ ; GCN-NEXT: s_add_u32 s4, s8, s19
364
364
; GCN-NEXT: s_addc_u32 s5, s9, 0
365
365
; GCN-NEXT: v_mov_b32_e32 v2, s4
366
366
; GCN-NEXT: v_mov_b32_e32 v3, s5
0 commit comments