Skip to content

Commit 40fa7f5

Browse files
authored
[AMDGPU] Fix computed kill mask (#122736)
Replace S_XOR with S_ANDN2 when computing the kill mask in demote/kill lowering. This has the effect of AND'ing demote/kill condition with exec which is needed for proper live mask update. The S_XOR is inadequate because it may return true for lane with exec=0. This patch fixes an image corruption in game. I think the issue went unnoticed because demote/kill condition is often naturally dependent on exec, so AND'ing with exec is usually not required.
1 parent 42595bd commit 40fa7f5

File tree

9 files changed

+33
-33
lines changed

9 files changed

+33
-33
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -960,7 +960,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
960960
// so exec mask needs to be factored in.
961961
TmpReg = MRI->createVirtualRegister(TRI->getBoolRC());
962962
ComputeKilledMaskMI =
963-
BuildMI(MBB, MI, DL, TII->get(XorOpc), TmpReg).add(Op).addReg(Exec);
963+
BuildMI(MBB, MI, DL, TII->get(AndN2Opc), TmpReg).addReg(Exec).add(Op);
964964
MaskUpdateMI = BuildMI(MBB, MI, DL, TII->get(AndN2Opc), LiveMaskReg)
965965
.addReg(LiveMaskReg)
966966
.addReg(TmpReg);

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
7878
; SI: ; %bb.0: ; %.entry
7979
; SI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
8080
; SI-NEXT: s_mov_b64 s[2:3], exec
81-
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
81+
; SI-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
8282
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
8383
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
8484
; SI-NEXT: s_cbranch_scc0 .LBB1_2
@@ -96,7 +96,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
9696
; GFX9: ; %bb.0: ; %.entry
9797
; GFX9-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
9898
; GFX9-NEXT: s_mov_b64 s[2:3], exec
99-
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
99+
; GFX9-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
100100
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
101101
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
102102
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
@@ -115,7 +115,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
115115
; GFX10-32-NEXT: v_cmp_le_f32_e64 s0, 0, v1
116116
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
117117
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
118-
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
118+
; GFX10-32-NEXT: s_andn2_b32 s0, exec_lo, s0
119119
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
120120
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
121121
; GFX10-32-NEXT: ; %bb.1: ; %.entry
@@ -133,7 +133,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
133133
; GFX10-64-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
134134
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
135135
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
136-
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
136+
; GFX10-64-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
137137
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
138138
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
139139
; GFX10-64-NEXT: ; %bb.1: ; %.entry
@@ -556,7 +556,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
556556
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
557557
; SI-NEXT: s_waitcnt vmcnt(0)
558558
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
559-
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
559+
; SI-NEXT: s_andn2_b64 s[14:15], exec, vcc
560560
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
561561
; SI-NEXT: s_cbranch_scc0 .LBB5_2
562562
; SI-NEXT: ; %bb.1: ; %.entry
@@ -580,7 +580,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
580580
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
581581
; GFX9-NEXT: s_waitcnt vmcnt(0)
582582
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
583-
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
583+
; GFX9-NEXT: s_andn2_b64 s[14:15], exec, vcc
584584
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
585585
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
586586
; GFX9-NEXT: ; %bb.1: ; %.entry
@@ -604,7 +604,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
604604
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
605605
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
606606
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
607-
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
607+
; GFX10-32-NEXT: s_andn2_b32 s13, exec_lo, vcc_lo
608608
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
609609
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
610610
; GFX10-32-NEXT: ; %bb.1: ; %.entry
@@ -628,7 +628,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
628628
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
629629
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
630630
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
631-
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
631+
; GFX10-64-NEXT: s_andn2_b64 s[14:15], exec, vcc
632632
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
633633
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
634634
; GFX10-64-NEXT: ; %bb.1: ; %.entry

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ define amdgpu_gs void @false() {
5555
; GCN: v_cmp_lt_i32
5656
; GCN: v_cmp_lt_i32
5757
; GCN: s_or_b64 s[0:1]
58-
; GCN: s_xor_b64 s[0:1], s[0:1], exec
58+
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
5959
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
6060
; GCN: s_and_b64 exec, exec, s[2:3]
6161
define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
@@ -238,7 +238,7 @@ define amdgpu_ps void @fcmp_x2(float %a) #0 {
238238
; GCN: v_cmp_neq_f32_e32 vcc, 0
239239
; GCN-DAG: s_wqm_b64 s[2:3], vcc
240240
; GCN-DAG: s_mov_b64 s[0:1], exec
241-
; GCN: s_xor_b64 s[2:3], s[2:3], exec
241+
; GCN: s_and{{n2|_not1}}_b64 s[2:3], exec, s[2:3]
242242
; GCN: s_and{{n2|_not1}}_b64 s[0:1], s[0:1], s[2:3]
243243
; GCN: s_and_b64 exec, exec, s[0:1]
244244
define amdgpu_ps float @wqm(float %a) {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
7878
; SI: ; %bb.0: ; %.entry
7979
; SI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
8080
; SI-NEXT: s_mov_b64 s[2:3], exec
81-
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], exec
81+
; SI-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
8282
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
8383
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
8484
; SI-NEXT: s_cbranch_scc0 .LBB1_2
@@ -96,7 +96,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
9696
; GFX9: ; %bb.0: ; %.entry
9797
; GFX9-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
9898
; GFX9-NEXT: s_mov_b64 s[2:3], exec
99-
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], exec
99+
; GFX9-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
100100
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
101101
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
102102
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
@@ -115,7 +115,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
115115
; GFX10-32-NEXT: v_cmp_le_f32_e64 s0, 0, v1
116116
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
117117
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
118-
; GFX10-32-NEXT: s_xor_b32 s0, s0, exec_lo
118+
; GFX10-32-NEXT: s_andn2_b32 s0, exec_lo, s0
119119
; GFX10-32-NEXT: s_andn2_b32 s1, s1, s0
120120
; GFX10-32-NEXT: s_cbranch_scc0 .LBB1_2
121121
; GFX10-32-NEXT: ; %bb.1: ; %.entry
@@ -133,7 +133,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
133133
; GFX10-64-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
134134
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
135135
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
136-
; GFX10-64-NEXT: s_xor_b64 s[0:1], s[0:1], exec
136+
; GFX10-64-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
137137
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
138138
; GFX10-64-NEXT: s_cbranch_scc0 .LBB1_2
139139
; GFX10-64-NEXT: ; %bb.1: ; %.entry
@@ -557,7 +557,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
557557
; SI-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
558558
; SI-NEXT: s_waitcnt vmcnt(0)
559559
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
560-
; SI-NEXT: s_xor_b64 s[14:15], vcc, exec
560+
; SI-NEXT: s_andn2_b64 s[14:15], exec, vcc
561561
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
562562
; SI-NEXT: s_cbranch_scc0 .LBB5_2
563563
; SI-NEXT: ; %bb.1: ; %.entry
@@ -581,7 +581,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
581581
; GFX9-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
582582
; GFX9-NEXT: s_waitcnt vmcnt(0)
583583
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
584-
; GFX9-NEXT: s_xor_b64 s[14:15], vcc, exec
584+
; GFX9-NEXT: s_andn2_b64 s[14:15], exec, vcc
585585
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
586586
; GFX9-NEXT: s_cbranch_scc0 .LBB5_2
587587
; GFX9-NEXT: ; %bb.1: ; %.entry
@@ -605,7 +605,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
605605
; GFX10-32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
606606
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
607607
; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
608-
; GFX10-32-NEXT: s_xor_b32 s13, vcc_lo, exec_lo
608+
; GFX10-32-NEXT: s_andn2_b32 s13, exec_lo, vcc_lo
609609
; GFX10-32-NEXT: s_andn2_b32 s12, s12, s13
610610
; GFX10-32-NEXT: s_cbranch_scc0 .LBB5_2
611611
; GFX10-32-NEXT: ; %bb.1: ; %.entry
@@ -629,7 +629,7 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
629629
; GFX10-64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
630630
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
631631
; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
632-
; GFX10-64-NEXT: s_xor_b64 s[14:15], vcc, exec
632+
; GFX10-64-NEXT: s_andn2_b64 s[14:15], exec, vcc
633633
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[14:15]
634634
; GFX10-64-NEXT: s_cbranch_scc0 .LBB5_2
635635
; GFX10-64-NEXT: ; %bb.1: ; %.entry

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ main_body:
4343
;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1
4444

4545
;WAVE64: s_wqm_b64 [[WQM:[^,]+]], [[CMP]]
46-
;WAVE64: s_xor_b64 [[KILL:[^,]+]], [[WQM]], exec
46+
;WAVE64: s_andn2_b64 [[KILL:[^,]+]], exec, [[WQM]]
4747
;WAVE64: s_andn2_b64 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
4848
;WAVE64: s_and_b64 exec, exec, [[MASK]]
4949

5050
;WAVE32: s_wqm_b32 [[WQM:[^,]+]], [[CMP]]
51-
;WAVE32: s_xor_b32 [[KILL:[^,]+]], [[WQM]], exec
51+
;WAVE32: s_and{{n2|_not1}}_b32 [[KILL:[^,]+]], exec_lo, [[WQM]]
5252
;WAVE32: s_and{{n2|_not1}}_b32 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
5353
;WAVE32: s_and_b32 exec_lo, exec_lo, [[MASK]]
5454

llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
1717
; SI-NEXT: ; %bb.2: ; %endif1
1818
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
1919
; SI-NEXT: s_wqm_b64 s[4:5], s[2:3]
20-
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
20+
; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
2121
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
2222
; SI-NEXT: s_cbranch_scc0 .LBB0_6
2323
; SI-NEXT: ; %bb.3: ; %endif1
@@ -59,7 +59,7 @@ define amdgpu_ps float @uniform_kill(float %a, i32 %b, float %c) {
5959
; FLAT-NEXT: ; %bb.2: ; %endif1
6060
; FLAT-NEXT: s_or_b64 exec, exec, s[4:5]
6161
; FLAT-NEXT: s_wqm_b64 s[4:5], s[2:3]
62-
; FLAT-NEXT: s_xor_b64 s[4:5], s[4:5], exec
62+
; FLAT-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
6363
; FLAT-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
6464
; FLAT-NEXT: s_cbranch_scc0 .LBB0_6
6565
; FLAT-NEXT: ; %bb.3: ; %endif1

llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
761761
; SI-NEXT: s_cmp_eq_u32 s0, 1
762762
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
763763
; SI-NEXT: s_mov_b64 s[2:3], exec
764-
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
764+
; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
765765
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
766766
; SI-NEXT: s_cbranch_scc0 .LBB9_4
767767
; SI-NEXT: ; %bb.1: ; %entry
@@ -798,7 +798,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
798798
; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
799799
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
800800
; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
801-
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
801+
; GFX10-WAVE64-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
802802
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
803803
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
804804
; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
@@ -835,7 +835,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
835835
; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
836836
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
837837
; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
838-
; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
838+
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, exec_lo, s2
839839
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
840840
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
841841
; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
@@ -873,7 +873,7 @@ define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
873873
; GFX11-NEXT: s_mov_b64 s[2:3], exec
874874
; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
875875
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
876-
; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec
876+
; GFX11-NEXT: s_and_not1_b64 s[4:5], exec, s[4:5]
877877
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5]
878878
; GFX11-NEXT: s_cbranch_scc0 .LBB9_4
879879
; GFX11-NEXT: ; %bb.1: ; %entry

llvm/test/CodeGen/AMDGPU/wave32.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,7 +1767,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
17671767
; GFX1032-NEXT: v_cmp_lt_i32_e64 s0, v2, v3
17681768
; GFX1032-NEXT: s_mov_b32 s1, exec_lo
17691769
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
1770-
; GFX1032-NEXT: s_xor_b32 s0, s0, exec_lo
1770+
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
17711771
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
17721772
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
17731773
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
@@ -1783,7 +1783,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
17831783
; GFX1064-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
17841784
; GFX1064-NEXT: s_mov_b64 s[2:3], exec
17851785
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1786-
; GFX1064-NEXT: s_xor_b64 s[0:1], s[0:1], exec
1786+
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
17871787
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
17881788
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
17891789
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
@@ -2256,7 +2256,7 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
22562256
; GFX1032-NEXT: s_mov_b32 s0, exec_lo
22572257
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
22582258
; GFX1032-NEXT: s_wqm_b32 s1, vcc_lo
2259-
; GFX1032-NEXT: s_xor_b32 s1, s1, exec_lo
2259+
; GFX1032-NEXT: s_andn2_b32 s1, exec_lo, s1
22602260
; GFX1032-NEXT: s_andn2_b32 s0, s0, s1
22612261
; GFX1032-NEXT: s_cbranch_scc0 .LBB44_2
22622262
; GFX1032-NEXT: ; %bb.1:
@@ -2274,7 +2274,7 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
22742274
; GFX1064-NEXT: s_mov_b64 s[0:1], exec
22752275
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
22762276
; GFX1064-NEXT: s_wqm_b64 s[2:3], vcc
2277-
; GFX1064-NEXT: s_xor_b64 s[2:3], s[2:3], exec
2277+
; GFX1064-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
22782278
; GFX1064-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
22792279
; GFX1064-NEXT: s_cbranch_scc0 .LBB44_2
22802280
; GFX1064-NEXT: ; %bb.1:

llvm/test/CodeGen/AMDGPU/wqm.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2938,7 +2938,7 @@ define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rs
29382938
; GFX9-W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
29392939
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
29402940
; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
2941-
; GFX9-W64-NEXT: s_xor_b64 s[0:1], vcc, exec
2941+
; GFX9-W64-NEXT: s_andn2_b64 s[0:1], exec, vcc
29422942
; GFX9-W64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[0:1]
29432943
; GFX9-W64-NEXT: s_cbranch_scc0 .LBB51_2
29442944
; GFX9-W64-NEXT: ; %bb.1: ; %main_body
@@ -2973,7 +2973,7 @@ define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rs
29732973
; GFX10-W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
29742974
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
29752975
; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
2976-
; GFX10-W32-NEXT: s_xor_b32 s0, vcc_lo, exec_lo
2976+
; GFX10-W32-NEXT: s_andn2_b32 s0, exec_lo, vcc_lo
29772977
; GFX10-W32-NEXT: s_andn2_b32 s12, s12, s0
29782978
; GFX10-W32-NEXT: s_cbranch_scc0 .LBB51_2
29792979
; GFX10-W32-NEXT: ; %bb.1: ; %main_body

0 commit comments

Comments
 (0)