@@ -3551,13 +3551,13 @@ define amdgpu_ps float @short_exact_regions(<8 x i32> inreg %rsrc, <4 x i32> inr
3551
3551
; GFX9-W64-NEXT: s_and_saveexec_b64 s[14:15], vcc
3552
3552
; GFX9-W64-NEXT: s_cbranch_execz .LBB59_2
3553
3553
; GFX9-W64-NEXT: ; %bb.1: ; %if
3554
- ; GFX9-W64-NEXT: s_and_saveexec_b64 s[16:17], s[12:13]
3555
3554
; GFX9-W64-NEXT: global_load_dword v0, v[1:2], off
3556
3555
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
3557
- ; GFX9-W64-NEXT: v_readfirstlane_b32 s18 , v0
3558
- ; GFX9-W64-NEXT: s_buffer_load_dword s18 , s[8:11], s18 offset:0x0
3556
+ ; GFX9-W64-NEXT: v_readfirstlane_b32 s16 , v0
3557
+ ; GFX9-W64-NEXT: s_buffer_load_dword s16 , s[8:11], s16 offset:0x0
3559
3558
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
3560
- ; GFX9-W64-NEXT: v_mov_b32_e32 v0, s18
3559
+ ; GFX9-W64-NEXT: v_mov_b32_e32 v0, s16
3560
+ ; GFX9-W64-NEXT: s_and_saveexec_b64 s[16:17], s[12:13]
3561
3561
; GFX9-W64-NEXT: buffer_store_dwordx4 v[3:6], v0, s[0:3], 0 idxen
3562
3562
; GFX9-W64-NEXT: s_mov_b64 exec, s[16:17]
3563
3563
; GFX9-W64-NEXT: .LBB59_2: ; %endif
@@ -3581,13 +3581,13 @@ define amdgpu_ps float @short_exact_regions(<8 x i32> inreg %rsrc, <4 x i32> inr
3581
3581
; GFX10-W32-NEXT: v_cmpx_gt_u32_e32 16, v0
3582
3582
; GFX10-W32-NEXT: s_cbranch_execz .LBB59_2
3583
3583
; GFX10-W32-NEXT: ; %bb.1: ; %if
3584
- ; GFX10-W32-NEXT: s_and_saveexec_b32 s14, s12
3585
3584
; GFX10-W32-NEXT: global_load_dword v0, v[1:2], off
3586
3585
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
3587
- ; GFX10-W32-NEXT: v_readfirstlane_b32 s15 , v0
3588
- ; GFX10-W32-NEXT: s_buffer_load_dword s15 , s[8:11], s15 offset:0x0
3586
+ ; GFX10-W32-NEXT: v_readfirstlane_b32 s14 , v0
3587
+ ; GFX10-W32-NEXT: s_buffer_load_dword s14 , s[8:11], s14 offset:0x0
3589
3588
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
3590
- ; GFX10-W32-NEXT: v_mov_b32_e32 v0, s15
3589
+ ; GFX10-W32-NEXT: v_mov_b32_e32 v0, s14
3590
+ ; GFX10-W32-NEXT: s_and_saveexec_b32 s14, s12
3591
3591
; GFX10-W32-NEXT: buffer_store_dwordx4 v[3:6], v0, s[0:3], 0 idxen
3592
3592
; GFX10-W32-NEXT: s_mov_b32 exec_lo, s14
3593
3593
; GFX10-W32-NEXT: .LBB59_2: ; %endif
@@ -3633,38 +3633,37 @@ define amdgpu_ps float @short_exact_regions_2(<8 x i32> inreg %rsrc, <4 x i32> i
3633
3633
; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec
3634
3634
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
3635
3635
; GFX9-W64-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3
3636
- ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
3637
- ; GFX9-W64-NEXT: global_load_dword v0, v[1:2], off
3638
- ; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3639
- ; GFX9-W64-NEXT: image_sample v5, v3, s[0:7], s[8:11] dmask:0x4
3640
- ; GFX9-W64-NEXT: ; kill: killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6 killed $sgpr7
3641
- ; GFX9-W64-NEXT: ; kill: killed $vgpr3
3636
+ ; GFX9-W64-NEXT: global_load_dword v5, v[1:2], off
3637
+ ; GFX9-W64-NEXT: ; kill: killed $vgpr0
3642
3638
; GFX9-W64-NEXT: ; kill: killed $vgpr1 killed $vgpr2
3643
3639
; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3644
- ; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v0
3640
+ ; GFX9-W64-NEXT: image_sample v0, v3, s[0:7], s[8:11] dmask:0x4
3641
+ ; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3642
+ ; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v5
3645
3643
; GFX9-W64-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3646
3644
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
3647
- ; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v5
3645
+ ; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v0
3648
3646
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
3649
3647
; GFX9-W64-NEXT: v_add_f32_e32 v0, s0, v0
3648
+ ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
3650
3649
; GFX9-W64-NEXT: ; return to shader part epilog
3651
3650
;
3652
3651
; GFX10-W32-LABEL: short_exact_regions_2:
3653
3652
; GFX10-W32: ; %bb.0: ; %main_body
3654
3653
; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo
3655
3654
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
3656
3655
; GFX10-W32-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
3657
- ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
3658
3656
; GFX10-W32-NEXT: global_load_dword v0, v[1:2], off
3659
3657
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
3660
3658
; GFX10-W32-NEXT: image_sample v1, v3, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D
3661
3659
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
3662
3660
; GFX10-W32-NEXT: v_readfirstlane_b32 s0, v0
3661
+ ; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3663
3662
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
3664
3663
; GFX10-W32-NEXT: v_add_f32_e32 v0, v4, v1
3665
- ; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3666
3664
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
3667
3665
; GFX10-W32-NEXT: v_add_f32_e32 v0, s0, v0
3666
+ ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
3668
3667
; GFX10-W32-NEXT: ; return to shader part epilog
3669
3668
main_body:
3670
3669
%tex1 = call <4 x float > @llvm.amdgcn.image.sample.1d.v4f32.f32 (i32 15 , float %c , <8 x i32 > %rsrc , <4 x i32 > %sampler , i1 false , i32 0 , i32 0 ) #0
0 commit comments