Skip to content

Commit 4169c88

Browse files
committed
enabled the one test earlier disabled.
1 parent 40a7ef2 commit 4169c88

File tree

1 file changed

+188
-15
lines changed

1 file changed

+188
-15
lines changed

llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll

Lines changed: 188 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -297,21 +297,194 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
297297
ret i32 %elt
298298
}
299299

300-
;TODO: This test should be enabled in the upstream later. It currently causes a crash
301-
; during branch relaxation as the gfx1250 real opcode definition for V_LSHL_ADD_U64
302-
; is not yet upstreamed.
303-
;define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %ptr_b, ptr addrspace(1) %out) {
304-
; %a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4
305-
; %in_a = insertelement <16 x i64> %a, i64 100, i32 5
306-
; store <16 x i64> %in_a, ptr addrspace(1) null
307-
; %b = load <16 x i64>, ptr addrspace(1) %ptr_b, align 4
308-
; %in_b = insertelement <16 x i64> %a, i64 200, i32 10
309-
; store <16 x i64> %in_b, ptr addrspace(1) null
310-
; %add = add <16 x i64> %in_a, %in_b
311-
; store <16 x i64> %add, ptr addrspace(1) %out, align 4
312-
; %elt = extractelement <16 x i64> %add, i32 1
313-
; ret i64 %elt
314-
;}
300+
define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %ptr_b, ptr addrspace(1) %out) {
301+
; GCN-SDAG-LABEL: test_v16i64_load_store:
302+
; GCN-SDAG: ; %bb.0:
303+
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
304+
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
305+
; GCN-SDAG-NEXT: s_clause 0x3
306+
; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12
307+
; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8
308+
; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4
309+
; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32
310+
; GCN-SDAG-NEXT: s_clause 0x7
311+
; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:112
312+
; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:96
313+
; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:80
314+
; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:48
315+
; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:32
316+
; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:16
317+
; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off
318+
; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
319+
; GCN-SDAG-NEXT: v_mov_b32_e32 v16, 0x70
320+
; GCN-SDAG-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60
321+
; GCN-SDAG-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48
322+
; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0
323+
; GCN-SDAG-NEXT: v_mov_b32_e32 v54, 32
324+
; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0
325+
; GCN-SDAG-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 64
326+
; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v40, 16
327+
; GCN-SDAG-NEXT: v_mov_b32_e32 v49, 0
328+
; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0
329+
; GCN-SDAG-NEXT: v_mov_b32_e32 v43, 0
330+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
331+
; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off
332+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
333+
; GCN-SDAG-NEXT: global_store_b128 v[50:51], v[10:13], off
334+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x5
335+
; GCN-SDAG-NEXT: s_wait_xcnt 0x1
336+
; GCN-SDAG-NEXT: v_dual_mov_b32 v16, v20 :: v_dual_mov_b32 v17, v21
337+
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
338+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
339+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
340+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
341+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, v[6:7]
342+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x4
343+
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off
344+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x3
345+
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[30:33], off
346+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x2
347+
; GCN-SDAG-NEXT: global_store_b128 v[40:41], v[22:25], off
348+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
349+
; GCN-SDAG-NEXT: global_store_b128 v[42:43], v[26:29], off
350+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
351+
; GCN-SDAG-NEXT: s_wait_xcnt 0x3
352+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[2:3], 0, v[2:3]
353+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[0:1], 0, v[0:1]
354+
; GCN-SDAG-NEXT: s_wait_xcnt 0x1
355+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
356+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
357+
; GCN-SDAG-NEXT: s_wait_xcnt 0x0
358+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
359+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
360+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[36:37], 0, v[36:37]
361+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[34:35], v[34:35], 0, v[34:35]
362+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, 0x64
363+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
364+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, v[20:21]
365+
; GCN-SDAG-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, 0xc8
366+
; GCN-SDAG-NEXT: s_clause 0x1
367+
; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[14:17], off
368+
; GCN-SDAG-NEXT: global_store_b128 v[48:49], v[0:3], off
369+
; GCN-SDAG-NEXT: s_clause 0x7
370+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96
371+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112
372+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[50:53], off offset:64
373+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:80
374+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:32
375+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:48
376+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off
377+
; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:16
378+
; GCN-SDAG-NEXT: s_clause 0x3
379+
; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32
380+
; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:4
381+
; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:8
382+
; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:12
383+
; GCN-SDAG-NEXT: s_wait_xcnt 0xc
384+
; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v28 :: v_dual_mov_b32 v1, v29
385+
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
386+
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
387+
;
388+
; GCN-GISEL-LABEL: test_v16i64_load_store:
389+
; GCN-GISEL: ; %bb.0:
390+
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
391+
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
392+
; GCN-GISEL-NEXT: s_clause 0x5
393+
; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20
394+
; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16
395+
; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12
396+
; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8
397+
; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4
398+
; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32
399+
; GCN-GISEL-NEXT: s_clause 0x7
400+
; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80
401+
; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
402+
; GCN-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
403+
; GCN-GISEL-NEXT: global_load_b128 v[18:21], v[0:1], off offset:32
404+
; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:48
405+
; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96
406+
; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112
407+
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
408+
; GCN-GISEL-NEXT: v_mov_b32_e32 v34, 0xc8
409+
; GCN-GISEL-NEXT: v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0
410+
; GCN-GISEL-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16
411+
; GCN-GISEL-NEXT: v_dual_mov_b32 v49, 0 :: v_dual_mov_b32 v50, 32
412+
; GCN-GISEL-NEXT: v_dual_mov_b32 v52, 48 :: v_dual_mov_b32 v51, 0
413+
; GCN-GISEL-NEXT: v_dual_mov_b32 v53, 0 :: v_dual_mov_b32 v54, 64
414+
; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v55, 0
415+
; GCN-GISEL-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0x60
416+
; GCN-GISEL-NEXT: v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0
417+
; GCN-GISEL-NEXT: v_mov_b32_e32 v45, 0
418+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x7
419+
; GCN-GISEL-NEXT: v_dual_mov_b32 v37, v9 :: v_dual_mov_b32 v36, v8
420+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8
421+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
422+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x6
423+
; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off
424+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x5
425+
; GCN-GISEL-NEXT: global_store_b128 v[48:49], v[14:17], off
426+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x4
427+
; GCN-GISEL-NEXT: global_store_b128 v[50:51], v[18:21], off
428+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x3
429+
; GCN-GISEL-NEXT: global_store_b128 v[52:53], v[22:25], off
430+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x2
431+
; GCN-GISEL-NEXT: global_store_b128 v[42:43], v[26:29], off
432+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
433+
; GCN-GISEL-NEXT: global_store_b128 v[44:45], v[30:33], off
434+
; GCN-GISEL-NEXT: s_wait_xcnt 0x5
435+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
436+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
437+
; GCN-GISEL-NEXT: s_wait_xcnt 0x4
438+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[14:15], v[14:15], 0, v[14:15]
439+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[16:17], v[16:17], 0, v[16:17]
440+
; GCN-GISEL-NEXT: s_wait_xcnt 0x3
441+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, v[18:19]
442+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, 0x64
443+
; GCN-GISEL-NEXT: s_wait_xcnt 0x2
444+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
445+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
446+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
447+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[48:49], v[0:1], 0, v[0:1]
448+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[50:51], v[2:3], 0, v[2:3]
449+
; GCN-GISEL-NEXT: s_wait_xcnt 0x1
450+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
451+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
452+
; GCN-GISEL-NEXT: s_wait_xcnt 0x0
453+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
454+
; GCN-GISEL-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, v[32:33]
455+
; GCN-GISEL-NEXT: s_clause 0x1
456+
; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off
457+
; GCN-GISEL-NEXT: global_store_b128 v[40:41], v[34:37], off
458+
; GCN-GISEL-NEXT: s_clause 0x7
459+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off
460+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:16
461+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[18:21], off offset:32
462+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[22:25], off offset:48
463+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[48:51], off offset:64
464+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80
465+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[26:29], off offset:96
466+
; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[30:33], off offset:112
467+
; GCN-GISEL-NEXT: s_clause 0x5
468+
; GCN-GISEL-NEXT: scratch_load_b32 v45, off, s32
469+
; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:4
470+
; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:8
471+
; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:12
472+
; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:16
473+
; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:20
474+
; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
475+
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
476+
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
477+
%a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4
478+
%in_a = insertelement <16 x i64> %a, i64 100, i32 5
479+
store <16 x i64> %in_a, ptr addrspace(1) null
480+
%b = load <16 x i64>, ptr addrspace(1) %ptr_b, align 4
481+
%in_b = insertelement <16 x i64> %a, i64 200, i32 10
482+
store <16 x i64> %in_b, ptr addrspace(1) null
483+
%add = add <16 x i64> %in_a, %in_b
484+
store <16 x i64> %add, ptr addrspace(1) %out, align 4
485+
%elt = extractelement <16 x i64> %add, i32 1
486+
ret i64 %elt
487+
}
315488

316489
define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2, ptr addrspace(1) %out) {
317490
; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:

0 commit comments

Comments
 (0)