Skip to content

Commit 4c178d8

Browse files
committed
[AMDGPU] Pre-commit 2 new saddr load tests. NFC.
1 parent d80b04a commit 4c178d8

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

llvm/test/CodeGen/AMDGPU/global-saddr-load.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,5 +2383,121 @@ define amdgpu_ps float @global_load_saddr_i8_offset_or_i64_imm_offset_4160(i8 ad
23832383
ret float %to.vgpr
23842384
}
23852385

2386+
; --------------------------------------------------------------------------------
2387+
; Full 64-bit scalar add.
2388+
; --------------------------------------------------------------------------------
2389+
2390+
define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg) {
2391+
; GFX9-LABEL: global_addr_64bit_lsr_iv:
2392+
; GFX9: ; %bb.0: ; %bb
2393+
; GFX9-NEXT: s_mov_b64 s[0:1], 0
2394+
; GFX9-NEXT: BB128_1: ; %bb3
2395+
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2396+
; GFX9-NEXT: s_add_u32 s4, s2, s0
2397+
; GFX9-NEXT: s_addc_u32 s5, s3, s1
2398+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
2399+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
2400+
; GFX9-NEXT: global_load_dword v0, v[0:1], off glc
2401+
; GFX9-NEXT: s_waitcnt vmcnt(0)
2402+
; GFX9-NEXT: s_add_u32 s0, s0, 4
2403+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
2404+
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
2405+
; GFX9-NEXT: s_cbranch_scc0 BB128_1
2406+
; GFX9-NEXT: ; %bb.2: ; %bb2
2407+
; GFX9-NEXT: s_endpgm
2408+
;
2409+
; GFX10-LABEL: global_addr_64bit_lsr_iv:
2410+
; GFX10: ; %bb.0: ; %bb
2411+
; GFX10-NEXT: s_mov_b64 s[0:1], 0
2412+
; GFX10-NEXT: BB128_1: ; %bb3
2413+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2414+
; GFX10-NEXT: s_add_u32 s4, s2, s0
2415+
; GFX10-NEXT: s_addc_u32 s5, s3, s1
2416+
; GFX10-NEXT: v_mov_b32_e32 v0, s4
2417+
; GFX10-NEXT: v_mov_b32_e32 v1, s5
2418+
; GFX10-NEXT: s_add_u32 s0, s0, 4
2419+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
2420+
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
2421+
; GFX10-NEXT: global_load_dword v0, v[0:1], off glc dlc
2422+
; GFX10-NEXT: s_waitcnt vmcnt(0)
2423+
; GFX10-NEXT: s_cbranch_scc0 BB128_1
2424+
; GFX10-NEXT: ; %bb.2: ; %bb2
2425+
; GFX10-NEXT: s_endpgm
2426+
bb:
2427+
br label %bb3
2428+
2429+
bb2: ; preds = %bb3
2430+
ret void
2431+
2432+
bb3: ; preds = %bb3, %bb
2433+
%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
2434+
%i4 = zext i32 %i to i64
2435+
%i5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %i4
2436+
%i6 = load volatile float, float addrspace(1)* %i5, align 4
2437+
%i8 = add nuw nsw i32 %i, 1
2438+
%i9 = icmp eq i32 %i8, 256
2439+
br i1 %i9, label %bb2, label %bb3
2440+
}
2441+
2442+
define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* inreg %arg, float addrspace(1)* inreg %arg.1) {
2443+
; GFX9-LABEL: global_addr_64bit_lsr_iv_multiload:
2444+
; GFX9: ; %bb.0: ; %bb
2445+
; GFX9-NEXT: s_mov_b64 s[0:1], 0
2446+
; GFX9-NEXT: BB129_1: ; %bb3
2447+
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2448+
; GFX9-NEXT: s_add_u32 s4, s2, s0
2449+
; GFX9-NEXT: s_addc_u32 s5, s3, s1
2450+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
2451+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
2452+
; GFX9-NEXT: global_load_dword v2, v[0:1], off glc
2453+
; GFX9-NEXT: s_waitcnt vmcnt(0)
2454+
; GFX9-NEXT: global_load_dword v2, v[0:1], off glc
2455+
; GFX9-NEXT: s_waitcnt vmcnt(0)
2456+
; GFX9-NEXT: s_add_u32 s0, s0, 4
2457+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
2458+
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
2459+
; GFX9-NEXT: ; kill: killed $vgpr0_vgpr1
2460+
; GFX9-NEXT: s_cbranch_scc0 BB129_1
2461+
; GFX9-NEXT: ; %bb.2: ; %bb2
2462+
; GFX9-NEXT: s_endpgm
2463+
;
2464+
; GFX10-LABEL: global_addr_64bit_lsr_iv_multiload:
2465+
; GFX10: ; %bb.0: ; %bb
2466+
; GFX10-NEXT: s_mov_b64 s[0:1], 0
2467+
; GFX10-NEXT: BB129_1: ; %bb3
2468+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2469+
; GFX10-NEXT: s_add_u32 s4, s2, s0
2470+
; GFX10-NEXT: s_addc_u32 s5, s3, s1
2471+
; GFX10-NEXT: v_mov_b32_e32 v0, s4
2472+
; GFX10-NEXT: v_mov_b32_e32 v1, s5
2473+
; GFX10-NEXT: s_add_u32 s0, s0, 4
2474+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
2475+
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
2476+
; GFX10-NEXT: ; kill: killed $vgpr0_vgpr1
2477+
; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc
2478+
; GFX10-NEXT: s_waitcnt vmcnt(0)
2479+
; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc
2480+
; GFX10-NEXT: s_waitcnt vmcnt(0)
2481+
; GFX10-NEXT: s_cbranch_scc0 BB129_1
2482+
; GFX10-NEXT: ; %bb.2: ; %bb2
2483+
; GFX10-NEXT: s_endpgm
2484+
bb:
2485+
br label %bb3
2486+
2487+
bb2: ; preds = %bb3
2488+
ret void
2489+
2490+
bb3: ; preds = %bb3, %bb
2491+
%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
2492+
%i4 = zext i32 %i to i64
2493+
%i5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %i4
2494+
%i6 = load volatile float, float addrspace(1)* %i5, align 4
2495+
%i5.1 = getelementptr inbounds float, float addrspace(1)* %arg.1, i64 %i4
2496+
%i6.1 = load volatile float, float addrspace(1)* %i5, align 4
2497+
%i8 = add nuw nsw i32 %i, 1
2498+
%i9 = icmp eq i32 %i8, 256
2499+
br i1 %i9, label %bb2, label %bb3
2500+
}
2501+
23862502
!0 = !{i32 0, i32 1073741824} ; (1 << 30)
23872503
!1 = !{i32 0, i32 1073741825} ; (1 << 30) + 1

0 commit comments

Comments
 (0)