@@ -3633,37 +3633,38 @@ define amdgpu_ps float @short_exact_regions_2(<8 x i32> inreg %rsrc, <4 x i32> i
3633
3633
; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec
3634
3634
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
3635
3635
; GFX9-W64-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3
3636
- ; GFX9-W64-NEXT: global_load_dword v5, v[1:2], off
3637
- ; GFX9-W64-NEXT: ; kill: killed $vgpr0
3638
- ; GFX9-W64-NEXT: ; kill: killed $vgpr1 killed $vgpr2
3636
+ ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
3637
+ ; GFX9-W64-NEXT: global_load_dword v0, v[1:2], off
3639
3638
; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3640
- ; GFX9-W64-NEXT: image_sample v0, v3, s[0:7], s[8:11] dmask:0x4
3639
+ ; GFX9-W64-NEXT: image_sample v5, v3, s[0:7], s[8:11] dmask:0x4
3640
+ ; GFX9-W64-NEXT: ; kill: killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6 killed $sgpr7
3641
+ ; GFX9-W64-NEXT: ; kill: killed $vgpr3
3642
+ ; GFX9-W64-NEXT: ; kill: killed $vgpr1 killed $vgpr2
3641
3643
; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3642
- ; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v5
3644
+ ; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v0
3643
3645
; GFX9-W64-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3644
3646
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
3645
- ; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v0
3647
+ ; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v5
3646
3648
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
3647
3649
; GFX9-W64-NEXT: v_add_f32_e32 v0, s0, v0
3648
- ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
3649
3650
; GFX9-W64-NEXT: ; return to shader part epilog
3650
3651
;
3651
3652
; GFX10-W32-LABEL: short_exact_regions_2:
3652
3653
; GFX10-W32: ; %bb.0: ; %main_body
3653
3654
; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo
3654
3655
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
3655
3656
; GFX10-W32-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
3657
+ ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
3656
3658
; GFX10-W32-NEXT: global_load_dword v0, v[1:2], off
3657
3659
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
3658
3660
; GFX10-W32-NEXT: image_sample v1, v3, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D
3659
3661
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
3660
3662
; GFX10-W32-NEXT: v_readfirstlane_b32 s0, v0
3661
- ; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3662
3663
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
3663
3664
; GFX10-W32-NEXT: v_add_f32_e32 v0, v4, v1
3665
+ ; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
3664
3666
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
3665
3667
; GFX10-W32-NEXT: v_add_f32_e32 v0, s0, v0
3666
- ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
3667
3668
; GFX10-W32-NEXT: ; return to shader part epilog
3668
3669
main_body:
3669
3670
%tex1 = call <4 x float > @llvm.amdgcn.image.sample.1d.v4f32.f32 (i32 15 , float %c , <8 x i32 > %rsrc , <4 x i32 > %sampler , i1 false , i32 0 , i32 0 ) #0
0 commit comments