@@ -297,21 +297,194 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
297
297
ret i32 %elt
298
298
}
299
299
300
- ;TODO: This test should be enabled in the upstream later. It currently causes a crash
301
- ; during branch relaxation as the gfx1250 real opcode definition for V_LSHL_ADD_U64
302
- ; is not yet upstreamed.
303
- ;define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %ptr_b, ptr addrspace(1) %out) {
304
- ; %a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4
305
- ; %in_a = insertelement <16 x i64> %a, i64 100, i32 5
306
- ; store <16 x i64> %in_a, ptr addrspace(1) null
307
- ; %b = load <16 x i64>, ptr addrspace(1) %ptr_b, align 4
308
- ; %in_b = insertelement <16 x i64> %a, i64 200, i32 10
309
- ; store <16 x i64> %in_b, ptr addrspace(1) null
310
- ; %add = add <16 x i64> %in_a, %in_b
311
- ; store <16 x i64> %add, ptr addrspace(1) %out, align 4
312
- ; %elt = extractelement <16 x i64> %add, i32 1
313
- ; ret i64 %elt
314
- ;}
300
+ define i64 @test_v16i64_load_store (ptr addrspace (1 ) %ptr_a , ptr addrspace (1 ) %ptr_b , ptr addrspace (1 ) %out ) {
301
+ ; GCN-SDAG-LABEL: test_v16i64_load_store:
302
+ ; GCN-SDAG: ; %bb.0:
303
+ ; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
304
+ ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
305
+ ; GCN-SDAG-NEXT: s_clause 0x3
306
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12
307
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8
308
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4
309
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32
310
+ ; GCN-SDAG-NEXT: s_clause 0x7
311
+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:112
312
+ ; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:96
313
+ ; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:80
314
+ ; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:48
315
+ ; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:32
316
+ ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:16
317
+ ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off
318
+ ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
319
+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v16, 0x70
320
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60
321
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48
322
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0
323
+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v54, 32
324
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0
325
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 64
326
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v40, 16
327
+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v49, 0
328
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0
329
+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v43, 0
330
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
331
+ ; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off
332
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
333
+ ; GCN-SDAG-NEXT: global_store_b128 v[50:51], v[10:13], off
334
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x5
335
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
336
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v16, v20 :: v_dual_mov_b32 v17, v21
337
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x0
338
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
339
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
340
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
341
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, v[6:7]
342
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x4
343
+ ; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off
344
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x3
345
+ ; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[30:33], off
346
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x2
347
+ ; GCN-SDAG-NEXT: global_store_b128 v[40:41], v[22:25], off
348
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
349
+ ; GCN-SDAG-NEXT: global_store_b128 v[42:43], v[26:29], off
350
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
351
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x3
352
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[2:3], 0, v[2:3]
353
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[0:1], 0, v[0:1]
354
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
355
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
356
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
357
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x0
358
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
359
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
360
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[36:37], 0, v[36:37]
361
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[34:35], v[34:35], 0, v[34:35]
362
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, 0x64
363
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
364
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, v[20:21]
365
+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, 0xc8
366
+ ; GCN-SDAG-NEXT: s_clause 0x1
367
+ ; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[14:17], off
368
+ ; GCN-SDAG-NEXT: global_store_b128 v[48:49], v[0:3], off
369
+ ; GCN-SDAG-NEXT: s_clause 0x7
370
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96
371
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112
372
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[50:53], off offset:64
373
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:80
374
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:32
375
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:48
376
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off
377
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:16
378
+ ; GCN-SDAG-NEXT: s_clause 0x3
379
+ ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32
380
+ ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:4
381
+ ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:8
382
+ ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:12
383
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0xc
384
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v28 :: v_dual_mov_b32 v1, v29
385
+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
386
+ ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
387
+ ;
388
+ ; GCN-GISEL-LABEL: test_v16i64_load_store:
389
+ ; GCN-GISEL: ; %bb.0:
390
+ ; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
391
+ ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
392
+ ; GCN-GISEL-NEXT: s_clause 0x5
393
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20
394
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16
395
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12
396
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8
397
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4
398
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32
399
+ ; GCN-GISEL-NEXT: s_clause 0x7
400
+ ; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80
401
+ ; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
402
+ ; GCN-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
403
+ ; GCN-GISEL-NEXT: global_load_b128 v[18:21], v[0:1], off offset:32
404
+ ; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:48
405
+ ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96
406
+ ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112
407
+ ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
408
+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v34, 0xc8
409
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0
410
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16
411
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v49, 0 :: v_dual_mov_b32 v50, 32
412
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v52, 48 :: v_dual_mov_b32 v51, 0
413
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v53, 0 :: v_dual_mov_b32 v54, 64
414
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v55, 0
415
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0x60
416
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0
417
+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v45, 0
418
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x7
419
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v37, v9 :: v_dual_mov_b32 v36, v8
420
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8
421
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
422
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6
423
+ ; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off
424
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x5
425
+ ; GCN-GISEL-NEXT: global_store_b128 v[48:49], v[14:17], off
426
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x4
427
+ ; GCN-GISEL-NEXT: global_store_b128 v[50:51], v[18:21], off
428
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x3
429
+ ; GCN-GISEL-NEXT: global_store_b128 v[52:53], v[22:25], off
430
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x2
431
+ ; GCN-GISEL-NEXT: global_store_b128 v[42:43], v[26:29], off
432
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
433
+ ; GCN-GISEL-NEXT: global_store_b128 v[44:45], v[30:33], off
434
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x5
435
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
436
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
437
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x4
438
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[14:15], v[14:15], 0, v[14:15]
439
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[16:17], v[16:17], 0, v[16:17]
440
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x3
441
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, v[18:19]
442
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, 0x64
443
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x2
444
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
445
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
446
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
447
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[48:49], v[0:1], 0, v[0:1]
448
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[50:51], v[2:3], 0, v[2:3]
449
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x1
450
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
451
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
452
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
453
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
454
+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, v[32:33]
455
+ ; GCN-GISEL-NEXT: s_clause 0x1
456
+ ; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off
457
+ ; GCN-GISEL-NEXT: global_store_b128 v[40:41], v[34:37], off
458
+ ; GCN-GISEL-NEXT: s_clause 0x7
459
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off
460
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:16
461
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[18:21], off offset:32
462
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[22:25], off offset:48
463
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[48:51], off offset:64
464
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80
465
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[26:29], off offset:96
466
+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[30:33], off offset:112
467
+ ; GCN-GISEL-NEXT: s_clause 0x5
468
+ ; GCN-GISEL-NEXT: scratch_load_b32 v45, off, s32
469
+ ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:4
470
+ ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:8
471
+ ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:12
472
+ ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:16
473
+ ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:20
474
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
475
+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
476
+ ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
477
+ %a = load <16 x i64 >, ptr addrspace (1 ) %ptr_a , align 4
478
+ %in_a = insertelement <16 x i64 > %a , i64 100 , i32 5
479
+ store <16 x i64 > %in_a , ptr addrspace (1 ) null
480
+ %b = load <16 x i64 >, ptr addrspace (1 ) %ptr_b , align 4
481
+ %in_b = insertelement <16 x i64 > %a , i64 200 , i32 10
482
+ store <16 x i64 > %in_b , ptr addrspace (1 ) null
483
+ %add = add <16 x i64 > %in_a , %in_b
484
+ store <16 x i64 > %add , ptr addrspace (1 ) %out , align 4
485
+ %elt = extractelement <16 x i64 > %add , i32 1
486
+ ret i64 %elt
487
+ }
315
488
316
489
define amdgpu_kernel void @test_v7i16_load_store_kernel (ptr addrspace (1 ) %ptr1 , ptr addrspace (1 ) %ptr2 , ptr addrspace (1 ) %out ) {
317
490
; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:
0 commit comments