Skip to content

[AMDGPU] Fix computed kill mask #122736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
// so exec mask needs to be factored in.
TmpReg = MRI->createVirtualRegister(TRI->getBoolRC());
ComputeKilledMaskMI =
BuildMI(MBB, MI, DL, TII->get(XorOpc), TmpReg).add(Op).addReg(Exec);
BuildMI(MBB, MI, DL, TII->get(AndN2Opc), TmpReg).addReg(Exec).add(Op);
MaskUpdateMI = BuildMI(MBB, MI, DL, TII->get(AndN2Opc), LiveMaskReg)
.addReg(LiveMaskReg)
.addReg(TmpReg);
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; SI: ; %bb.0: ; %.entry
; SI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; SI-NEXT: s_cbranch_scc0 .LBB1_2
Expand All @@ -96,7 +96,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; GFX9-NEXT: s_mov_b64 s[2:3], exec
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
Expand All @@ -115,7 +115,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX10-32-NEXT: v_cmp_le_f32_e64 s0, 0, v1
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
; GFX10-32-NEXT: ; %bb.1: ; %.entry
Expand All @@ -133,7 +133,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX10-64-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
; GFX10-64-NEXT: ; %bb.1: ; %.entry
Expand Down Expand Up @@ -556,7 +556,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
; SI-NEXT: s_andn2_b64 s[14:15], exec, vcc
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; SI-NEXT: s_cbranch_scc0 .LBB5_2
; SI-NEXT: ; %bb.1: ; %.entry
Expand All @@ -580,7 +580,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
; GFX9-NEXT: s_andn2_b64 s[14:15], exec, vcc
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
; GFX9-NEXT: ; %bb.1: ; %.entry
Expand All @@ -604,7 +604,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
; GFX10-32-NEXT: s_andn2_b32 s13, exec_lo, vcc_lo
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-32-NEXT: ; %bb.1: ; %.entry
Expand All @@ -628,7 +628,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
; GFX10-64-NEXT: s_andn2_b64 s[14:15], exec, vcc
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-64-NEXT: ; %bb.1: ; %.entry
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ define amdgpu_gs void @false() {
; GCN: v_cmp_lt_i32
; GCN: v_cmp_lt_i32
; GCN: s_or_b64 s[0:1]
; GCN: s_xor_b64 s[0:1], s[0:1], exec
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
Expand Down Expand Up @@ -238,7 +238,7 @@ define amdgpu_ps void @fcmp_x2(float %a) #0 {
; GCN: v_cmp_neq_f32_e32 vcc, 0
; GCN-DAG: s_wqm_b64 s[2:3], vcc
; GCN-DAG: s_mov_b64 s[0:1], exec
; GCN: s_xor_b64 s[2:3], s[2:3], exec
; GCN: s_and{{n2|_not1}}_b64 s[2:3], exec, s[2:3]
; GCN: s_and{{n2|_not1}}_b64 s[0:1], s[0:1], s[2:3]
; GCN: s_and_b64 exec, exec, s[0:1]
define amdgpu_ps float @wqm(float %a) {
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; SI: ; %bb.0: ; %.entry
; SI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; SI-NEXT: s_cbranch_scc0 .LBB1_2
Expand All @@ -96,7 +96,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; GFX9-NEXT: s_mov_b64 s[2:3], exec
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
Expand All @@ -115,7 +115,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX10-32-NEXT: v_cmp_le_f32_e64 s0, 0, v1
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
; GFX10-32-NEXT: ; %bb.1: ; %.entry
Expand All @@ -133,7 +133,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
; GFX10-64-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
; GFX10-64-NEXT: ; %bb.1: ; %.entry
Expand Down Expand Up @@ -557,7 +557,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; SI-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
; SI-NEXT: s_andn2_b64 s[14:15], exec, vcc
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; SI-NEXT: s_cbranch_scc0 .LBB5_2
; SI-NEXT: ; %bb.1: ; %.entry
Expand All @@ -581,7 +581,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX9-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
; GFX9-NEXT: s_andn2_b64 s[14:15], exec, vcc
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
; GFX9-NEXT: ; %bb.1: ; %.entry
Expand All @@ -605,7 +605,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX10-32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
; GFX10-32-NEXT: s_andn2_b32 s13, exec_lo, vcc_lo
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-32-NEXT: ; %bb.1: ; %.entry
Expand All @@ -629,7 +629,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
; GFX10-64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
; GFX10-64-NEXT: s_andn2_b64 s[14:15], exec, vcc
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-64-NEXT: ; %bb.1: ; %.entry
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ main_body:
;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1

;WAVE64: s_wqm_b64 [[WQM:[^,]+]], [[CMP]]
;WAVE64: s_xor_b64 [[KILL:[^,]+]], [[WQM]], exec
;WAVE64: s_andn2_b64 [[KILL:[^,]+]], exec, [[WQM]]
;WAVE64: s_andn2_b64 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
;WAVE64: s_and_b64 exec, exec, [[MASK]]

;WAVE32: s_wqm_b32 [[WQM:[^,]+]], [[CMP]]
;WAVE32: s_xor_b32 [[KILL:[^,]+]], [[WQM]], exec
;WAVE32: s_and{{n2|_not1}}_b32 [[KILL:[^,]+]], exec_lo, [[WQM]]
;WAVE32: s_and{{n2|_not1}}_b32 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
;WAVE32: s_and_b32 exec_lo, exec_lo, [[MASK]]

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
; SI-NEXT: ; %bb.2: ; %endif1
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_wqm_b64 s[4:5], s[2:3]
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; SI-NEXT: s_cbranch_scc0 .LBB0_6
; SI-NEXT: ; %bb.3: ; %endif1
Expand Down Expand Up @@ -59,7 +59,7 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
; FLAT-NEXT: ; %bb.2: ; %endif1
; FLAT-NEXT: s_or_b64 exec, exec, s[4:5]
; FLAT-NEXT: s_wqm_b64 s[4:5], s[2:3]
; FLAT-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; FLAT-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
; FLAT-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; FLAT-NEXT: s_cbranch_scc0 .LBB0_6
; FLAT-NEXT: ; %bb.3: ; %endif1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
; SI-NEXT: s_cmp_eq_u32 s0, 1
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; SI-NEXT: s_cbranch_scc0 .LBB9_4
; SI-NEXT: ; %bb.1: ; %entry
Expand Down Expand Up @@ -798,7 +798,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
Expand Down Expand Up @@ -835,7 +835,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, exec_lo, s2
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
Expand Down Expand Up @@ -873,7 +873,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; GFX11-NEXT: s_and_not1_b64 s[4:5], exec, s[4:5]
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5]
; GFX11-NEXT: s_cbranch_scc0 .LBB9_4
; GFX11-NEXT: ; %bb.1: ; %entry
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1767,7 +1767,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: v_cmp_lt_i32_e64 s0, v2, v3
; GFX1032-NEXT: s_mov_b32 s1, exec_lo
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_xor_b32 s0, s0, exec_lo
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -1783,7 +1783,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
; GFX1064-NEXT: s_mov_b64 s[2:3], exec
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], exec
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
Expand Down Expand Up @@ -2256,7 +2256,7 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_wqm_b32 s1, vcc_lo
; GFX1032-NEXT: s_xor_b32 s1, s1, exec_lo
; GFX1032-NEXT: s_andn2_b32 s1, exec_lo, s1
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
; GFX1032-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1032-NEXT: ; %bb.1:
Expand All @@ -2274,7 +2274,7 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: s_wqm_b64 s[2:3], vcc
; GFX1064-NEXT: s_xor_b64 s[2:3], s[2:3], exec
; GFX1064-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_cbranch_scc0 .LBB44_2
; GFX1064-NEXT: ; %bb.1:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/wqm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2938,7 +2938,7 @@ define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rs
; GFX9-W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; GFX9-W64-NEXT: s_xor_b64 s[0:1], vcc, exec
; GFX9-W64-NEXT: s_andn2_b64 s[0:1], exec, vcc
; GFX9-W64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[0:1]
; GFX9-W64-NEXT: s_cbranch_scc0 .LBB51_2
; GFX9-W64-NEXT: ; %bb.1: ; %main_body
Expand Down Expand Up @@ -2973,7 +2973,7 @@ define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rs
; GFX10-W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10-W32-NEXT: s_xor_b32 s0, vcc_lo, exec_lo
; GFX10-W32-NEXT: s_andn2_b32 s0, exec_lo, vcc_lo
; GFX10-W32-NEXT: s_andn2_b32 s12, s12, s0
; GFX10-W32-NEXT: s_cbranch_scc0 .LBB51_2
; GFX10-W32-NEXT: ; %bb.1: ; %main_body
Expand Down
Loading