@@ -2383,5 +2383,121 @@ define amdgpu_ps float @global_load_saddr_i8_offset_or_i64_imm_offset_4160(i8 ad
2383
2383
ret float %to.vgpr
2384
2384
}
2385
2385
2386
+ ; --------------------------------------------------------------------------------
2387
+ ; Full 64-bit scalar add.
2388
+ ; --------------------------------------------------------------------------------
2389
+
2390
+ define amdgpu_ps void @global_addr_64bit_lsr_iv (float addrspace (1 )* inreg %arg ) {
2391
+ ; GFX9-LABEL: global_addr_64bit_lsr_iv:
2392
+ ; GFX9: ; %bb.0: ; %bb
2393
+ ; GFX9-NEXT: s_mov_b64 s[0:1], 0
2394
+ ; GFX9-NEXT: BB128_1: ; %bb3
2395
+ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2396
+ ; GFX9-NEXT: s_add_u32 s4, s2, s0
2397
+ ; GFX9-NEXT: s_addc_u32 s5, s3, s1
2398
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
2399
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
2400
+ ; GFX9-NEXT: global_load_dword v0, v[0:1], off glc
2401
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2402
+ ; GFX9-NEXT: s_add_u32 s0, s0, 4
2403
+ ; GFX9-NEXT: s_addc_u32 s1, s1, 0
2404
+ ; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
2405
+ ; GFX9-NEXT: s_cbranch_scc0 BB128_1
2406
+ ; GFX9-NEXT: ; %bb.2: ; %bb2
2407
+ ; GFX9-NEXT: s_endpgm
2408
+ ;
2409
+ ; GFX10-LABEL: global_addr_64bit_lsr_iv:
2410
+ ; GFX10: ; %bb.0: ; %bb
2411
+ ; GFX10-NEXT: s_mov_b64 s[0:1], 0
2412
+ ; GFX10-NEXT: BB128_1: ; %bb3
2413
+ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2414
+ ; GFX10-NEXT: s_add_u32 s4, s2, s0
2415
+ ; GFX10-NEXT: s_addc_u32 s5, s3, s1
2416
+ ; GFX10-NEXT: v_mov_b32_e32 v0, s4
2417
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s5
2418
+ ; GFX10-NEXT: s_add_u32 s0, s0, 4
2419
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
2420
+ ; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
2421
+ ; GFX10-NEXT: global_load_dword v0, v[0:1], off glc dlc
2422
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
2423
+ ; GFX10-NEXT: s_cbranch_scc0 BB128_1
2424
+ ; GFX10-NEXT: ; %bb.2: ; %bb2
2425
+ ; GFX10-NEXT: s_endpgm
2426
+ bb:
2427
+ br label %bb3
2428
+
2429
+ bb2: ; preds = %bb3
2430
+ ret void
2431
+
2432
+ bb3: ; preds = %bb3, %bb
2433
+ %i = phi i32 [ 0 , %bb ], [ %i8 , %bb3 ]
2434
+ %i4 = zext i32 %i to i64
2435
+ %i5 = getelementptr inbounds float , float addrspace (1 )* %arg , i64 %i4
2436
+ %i6 = load volatile float , float addrspace (1 )* %i5 , align 4
2437
+ %i8 = add nuw nsw i32 %i , 1
2438
+ %i9 = icmp eq i32 %i8 , 256
2439
+ br i1 %i9 , label %bb2 , label %bb3
2440
+ }
2441
+
2442
+ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload (float addrspace (1 )* inreg %arg , float addrspace (1 )* inreg %arg.1 ) {
2443
+ ; GFX9-LABEL: global_addr_64bit_lsr_iv_multiload:
2444
+ ; GFX9: ; %bb.0: ; %bb
2445
+ ; GFX9-NEXT: s_mov_b64 s[0:1], 0
2446
+ ; GFX9-NEXT: BB129_1: ; %bb3
2447
+ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2448
+ ; GFX9-NEXT: s_add_u32 s4, s2, s0
2449
+ ; GFX9-NEXT: s_addc_u32 s5, s3, s1
2450
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
2451
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
2452
+ ; GFX9-NEXT: global_load_dword v2, v[0:1], off glc
2453
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2454
+ ; GFX9-NEXT: global_load_dword v2, v[0:1], off glc
2455
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2456
+ ; GFX9-NEXT: s_add_u32 s0, s0, 4
2457
+ ; GFX9-NEXT: s_addc_u32 s1, s1, 0
2458
+ ; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
2459
+ ; GFX9-NEXT: ; kill: killed $vgpr0_vgpr1
2460
+ ; GFX9-NEXT: s_cbranch_scc0 BB129_1
2461
+ ; GFX9-NEXT: ; %bb.2: ; %bb2
2462
+ ; GFX9-NEXT: s_endpgm
2463
+ ;
2464
+ ; GFX10-LABEL: global_addr_64bit_lsr_iv_multiload:
2465
+ ; GFX10: ; %bb.0: ; %bb
2466
+ ; GFX10-NEXT: s_mov_b64 s[0:1], 0
2467
+ ; GFX10-NEXT: BB129_1: ; %bb3
2468
+ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2469
+ ; GFX10-NEXT: s_add_u32 s4, s2, s0
2470
+ ; GFX10-NEXT: s_addc_u32 s5, s3, s1
2471
+ ; GFX10-NEXT: v_mov_b32_e32 v0, s4
2472
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s5
2473
+ ; GFX10-NEXT: s_add_u32 s0, s0, 4
2474
+ ; GFX10-NEXT: s_addc_u32 s1, s1, 0
2475
+ ; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
2476
+ ; GFX10-NEXT: ; kill: killed $vgpr0_vgpr1
2477
+ ; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc
2478
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
2479
+ ; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc
2480
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
2481
+ ; GFX10-NEXT: s_cbranch_scc0 BB129_1
2482
+ ; GFX10-NEXT: ; %bb.2: ; %bb2
2483
+ ; GFX10-NEXT: s_endpgm
2484
+ bb:
2485
+ br label %bb3
2486
+
2487
+ bb2: ; preds = %bb3
2488
+ ret void
2489
+
2490
+ bb3: ; preds = %bb3, %bb
2491
+ %i = phi i32 [ 0 , %bb ], [ %i8 , %bb3 ]
2492
+ %i4 = zext i32 %i to i64
2493
+ %i5 = getelementptr inbounds float , float addrspace (1 )* %arg , i64 %i4
2494
+ %i6 = load volatile float , float addrspace (1 )* %i5 , align 4
2495
+ %i5.1 = getelementptr inbounds float , float addrspace (1 )* %arg.1 , i64 %i4
2496
+ %i6.1 = load volatile float , float addrspace (1 )* %i5 , align 4
2497
+ %i8 = add nuw nsw i32 %i , 1
2498
+ %i9 = icmp eq i32 %i8 , 256
2499
+ br i1 %i9 , label %bb2 , label %bb3
2500
+ }
2501
+
2386
2502
!0 = !{i32 0 , i32 1073741824 } ; (1 << 30)
2387
2503
!1 = !{i32 0 , i32 1073741825 } ; (1 << 30) + 1
0 commit comments