9
9
; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
10
10
; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
11
11
12
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320 , [[BASE]]
13
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640 , [[BASE]]
14
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960 , [[BASE]]
12
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200 , [[BASE]]
13
+ ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400 , [[BASE]]
14
+ ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x800 , [[BASE]]
15
15
16
16
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
17
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
18
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
19
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
17
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:72 offset1:172
18
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset0:144 offset1:244
19
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset0:88 offset1:188
20
20
define amdgpu_kernel void @ds_read32_combine_stride_400 (float addrspace (3 )* nocapture readonly %arg , float *nocapture %arg1 ) {
21
21
bb:
22
22
%tmp = load float , float addrspace (3 )* %arg , align 4
52
52
53
53
; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
54
54
; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
55
- ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
56
- ; VI-DAG: v_add_u32_e32 [[B4:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
57
55
58
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x640, [[BASE]]
59
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x6e0, [[BASE]]
60
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x780, [[BASE]]
61
- ; GFX9-DAG: v_add_u32_e32 [[B4:v[0-9]+]], 0x820, [[BASE]]
56
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x400, [[BASE]]
57
+ ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x800, [[BASE]]
62
58
63
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:20
64
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2 ]] offset1:20
65
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3 ]] offset1:20
66
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B4 ]] offset1:20
59
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:144 offset1:164
60
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:184 offset1:204
61
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:224 offset1:244
62
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2 ]] offset0:8 offset1:28
67
63
define amdgpu_kernel void @ds_read32_combine_stride_20 (float addrspace (3 )* nocapture readonly %arg , float *nocapture %arg1 ) {
68
64
bb:
69
65
%tmp = getelementptr inbounds float , float addrspace (3 )* %arg , i32 400
102
98
; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
103
99
; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
104
100
105
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320 , [[BASE]]
106
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640 , [[BASE]]
107
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960 , [[BASE]]
101
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800 , [[BASE]]
102
+ ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400 , [[BASE]]
103
+ ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x200 , [[BASE]]
108
104
109
105
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
110
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
111
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
112
- ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
106
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:88 offset1:188
107
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset0:144 offset1:244
108
+ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset0:72 offset1:172
113
109
define amdgpu_kernel void @ds_read32_combine_stride_400_back (float addrspace (3 )* nocapture readonly %arg , float *nocapture %arg1 ) {
114
110
bb:
115
111
%tmp = getelementptr inbounds float , float addrspace (3 )* %arg , i32 700
@@ -180,16 +176,11 @@ bb:
180
176
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
181
177
182
178
; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
183
- ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
184
- ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
185
-
186
179
; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
187
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
188
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
189
180
190
181
; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
191
- ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2 ]] offset1:32
192
- ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3 ]] offset1:32
182
+ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:64 offset1:96
183
+ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:128 offset1:160
193
184
define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted (float addrspace (3 )* nocapture readonly %arg , float *nocapture %arg1 ) {
194
185
bb:
195
186
%tmp = getelementptr inbounds float , float addrspace (3 )* %arg , i32 2
@@ -219,12 +210,12 @@ bb:
219
210
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
220
211
221
212
; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
222
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960 , [[BASE]]
213
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800 , [[BASE]]
223
214
224
215
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
225
216
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
226
217
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
227
- ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50
218
+ ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:44 offset1:94
228
219
define amdgpu_kernel void @ds_read64_combine_stride_400 (double addrspace (3 )* nocapture readonly %arg , double *nocapture %arg1 ) {
229
220
bb:
230
221
%tmp = load double , double addrspace (3 )* %arg , align 8
@@ -259,16 +250,11 @@ bb:
259
250
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
260
251
261
252
; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
262
- ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
263
- ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
264
-
265
253
; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
266
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
267
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
268
254
269
255
; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
270
- ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2 ]] offset1:16
271
- ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3 ]] offset1:16
256
+ ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:32 offset1:48
257
+ ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1 ]] offset0:64 offset1:80
272
258
define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted (double addrspace (3 )* nocapture readonly %arg , double *nocapture %arg1 ) {
273
259
bb:
274
260
%tmp = getelementptr inbounds double , double addrspace (3 )* %arg , i32 1
@@ -301,14 +287,14 @@ bb:
301
287
; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
302
288
; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
303
289
304
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320 , [[BASE]]
305
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640 , [[BASE]]
306
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960 , [[BASE]]
290
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200 , [[BASE]]
291
+ ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400 , [[BASE]]
292
+ ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x800 , [[BASE]]
307
293
308
294
; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
309
- ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
310
- ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
311
- ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
295
+ ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:72 offset1:172
296
+ ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset0:144 offset1:244
297
+ ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset0:88 offset1:188
312
298
define amdgpu_kernel void @ds_write32_combine_stride_400 (float addrspace (3 )* nocapture %arg ) {
313
299
bb:
314
300
store float 1 .000000e+00 , float addrspace (3 )* %arg , align 4
@@ -337,14 +323,14 @@ bb:
337
323
; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
338
324
; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
339
325
340
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320 , [[BASE]]
341
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640 , [[BASE]]
342
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960 , [[BASE]]
326
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800 , [[BASE]]
327
+ ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400 , [[BASE]]
328
+ ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x200 , [[BASE]]
343
329
330
+ ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:88 offset1:188
331
+ ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset0:144 offset1:244
332
+ ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset0:72 offset1:172
344
333
; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
345
- ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
346
- ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
347
- ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
348
334
define amdgpu_kernel void @ds_write32_combine_stride_400_back (float addrspace (3 )* nocapture %arg ) {
349
335
bb:
350
336
%tmp = getelementptr inbounds float , float addrspace (3 )* %arg , i32 700
@@ -396,17 +382,12 @@ bb:
396
382
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
397
383
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
398
384
399
- ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
400
- ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
401
- ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
402
-
403
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 4, [[BASE]]
404
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4004, [[BASE]]
405
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8004, [[BASE]]
385
+ ; VI-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], vcc, 4, [[BASE]]
386
+ ; GFX9-DAG: v_add_u32_e32 [[BASE:v[0-9]+]], 4, [[BASE]]
406
387
407
- ; GCN-DAG: ds_write2st64_b32 [[B1 ]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
408
- ; GCN-DAG: ds_write2st64_b32 [[B2 ]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
409
- ; GCN-DAG: ds_write2st64_b32 [[B3 ]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
388
+ ; GCN-DAG: ds_write2st64_b32 [[BASE ]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
389
+ ; GCN-DAG: ds_write2st64_b32 [[BASE ]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96
390
+ ; GCN-DAG: ds_write2st64_b32 [[BASE ]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160
410
391
define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted (float addrspace (3 )* nocapture %arg ) {
411
392
bb:
412
393
%tmp = getelementptr inbounds float , float addrspace (3 )* %arg , i32 1
@@ -429,12 +410,12 @@ bb:
429
410
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
430
411
431
412
; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
432
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960 , [[BASE]]
413
+ ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800 , [[BASE]]
433
414
434
415
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
435
416
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
436
417
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
437
- ; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
418
+ ; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:44 offset1:94
438
419
define amdgpu_kernel void @ds_write64_combine_stride_400 (double addrspace (3 )* nocapture %arg ) {
439
420
bb:
440
421
store double 1 .000000e+00 , double addrspace (3 )* %arg , align 8
@@ -459,17 +440,12 @@ bb:
459
440
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
460
441
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
461
442
462
- ; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
463
- ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
464
- ; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
465
-
466
- ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
467
- ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
468
- ; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
443
+ ; VI-DAG: v_add_u32_e32 [[BASE]], vcc, 8, [[BASE]]
444
+ ; GFX9-DAG: v_add_u32_e32 [[BASE]], 8, [[BASE]]
469
445
470
- ; GCN-DAG: ds_write2st64_b64 [[B1 ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
471
- ; GCN-DAG: ds_write2st64_b64 [[B2 ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
472
- ; GCN-DAG: ds_write2st64_b64 [[B3 ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
446
+ ; GCN-DAG: ds_write2st64_b64 [[BASE ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
447
+ ; GCN-DAG: ds_write2st64_b64 [[BASE ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:32 offset1:48
448
+ ; GCN-DAG: ds_write2st64_b64 [[BASE ]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:64 offset1:80
473
449
define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted (double addrspace (3 )* nocapture %arg ) {
474
450
bb:
475
451
%tmp = getelementptr inbounds double , double addrspace (3 )* %arg , i32 1
0 commit comments