Skip to content

Commit 885ed8a

Browse files
committed
Refine condition for shortening exact region to only require
shortening if exact will be ended and exited.
1 parent b452ca5 commit 885ed8a

File tree

2 files changed

+11
-10
lines changed

2 files changed

+11
-10
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1374,7 +1374,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
13741374
// exclude instructions with unexpected effects from them.
13751375
// FIXME: ideally we would branch over these when EXEC=0,
13761376
// but this requires updating implicit values, live intervals and CFG.
1377-
if (WQMToExact || ExactToWQM) {
1377+
if ((WQMToExact && (OutNeeds & StateWQM)) || ExactToWQM) {
13781378
for (MachineBasicBlock::iterator I = First; I != II; ++I) {
13791379
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) {
13801380
PreferLast = WQMToExact;

llvm/test/CodeGen/AMDGPU/wqm.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3633,37 +3633,38 @@ define amdgpu_ps float @short_exact_regions_2(<8 x i32> inreg %rsrc, <4 x i32> i
36333633
; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec
36343634
; GFX9-W64-NEXT: s_wqm_b64 exec, exec
36353635
; GFX9-W64-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3
3636-
; GFX9-W64-NEXT: global_load_dword v5, v[1:2], off
3637-
; GFX9-W64-NEXT: ; kill: killed $vgpr0
3638-
; GFX9-W64-NEXT: ; kill: killed $vgpr1 killed $vgpr2
3636+
; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
3637+
; GFX9-W64-NEXT: global_load_dword v0, v[1:2], off
36393638
; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3640-
; GFX9-W64-NEXT: image_sample v0, v3, s[0:7], s[8:11] dmask:0x4
3639+
; GFX9-W64-NEXT: image_sample v5, v3, s[0:7], s[8:11] dmask:0x4
3640+
; GFX9-W64-NEXT: ; kill: killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6 killed $sgpr7
3641+
; GFX9-W64-NEXT: ; kill: killed $vgpr3
3642+
; GFX9-W64-NEXT: ; kill: killed $vgpr1 killed $vgpr2
36413643
; GFX9-W64-NEXT: s_waitcnt vmcnt(1)
3642-
; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v5
3644+
; GFX9-W64-NEXT: v_readfirstlane_b32 s0, v0
36433645
; GFX9-W64-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
36443646
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
3645-
; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v0
3647+
; GFX9-W64-NEXT: v_add_f32_e32 v0, v4, v5
36463648
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
36473649
; GFX9-W64-NEXT: v_add_f32_e32 v0, s0, v0
3648-
; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
36493650
; GFX9-W64-NEXT: ; return to shader part epilog
36503651
;
36513652
; GFX10-W32-LABEL: short_exact_regions_2:
36523653
; GFX10-W32: ; %bb.0: ; %main_body
36533654
; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo
36543655
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
36553656
; GFX10-W32-NEXT: image_sample v[3:4], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
3657+
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
36563658
; GFX10-W32-NEXT: global_load_dword v0, v[1:2], off
36573659
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
36583660
; GFX10-W32-NEXT: image_sample v1, v3, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D
36593661
; GFX10-W32-NEXT: s_waitcnt vmcnt(1)
36603662
; GFX10-W32-NEXT: v_readfirstlane_b32 s0, v0
3661-
; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
36623663
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
36633664
; GFX10-W32-NEXT: v_add_f32_e32 v0, v4, v1
3665+
; GFX10-W32-NEXT: s_buffer_load_dword s0, s[8:11], s0 offset:0x0
36643666
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
36653667
; GFX10-W32-NEXT: v_add_f32_e32 v0, s0, v0
3666-
; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
36673668
; GFX10-W32-NEXT: ; return to shader part epilog
36683669
main_body:
36693670
%tex1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0

0 commit comments

Comments
 (0)