Skip to content

Commit 52a3247

Browse files
[AMDGPU] Select (xor i1 (divergent trunc:i32 x), -1) -> cmp_neq x, 1 (#133698)
1 parent e377a5d commit 52a3247

11 files changed

+193
-196
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3123,6 +3123,17 @@ def IMMBitSelConst : SDNodeXForm<imm, [{
31233123
// v_cmp_ne_u32_e64 $a, 0, $a
31243124

31253125
// Handle the VALU case.
3126+
def : GCNPat <
3127+
(i1 (xor (i1 (DivergentUnaryFrag<HasOneUseUnaryOp<trunc>> i32:$a)), -1)),
3128+
(V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 1), i32:$a), (i32 1))
3129+
>;
3130+
3131+
def : GCNPat <
3132+
(i1 (xor (i1 (DivergentUnaryFrag<HasOneUseUnaryOp<trunc>> i64:$a)), -1)),
3133+
(V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 1),
3134+
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
3135+
>;
3136+
31263137
def : GCNPat <
31273138
(i1 (DivergentUnaryFrag<trunc> (i32 (srl i32:$a, (i32 imm:$b))))),
31283139
(V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a),

llvm/test/CodeGen/AMDGPU/extract-subvector.ll

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@ define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
66
; GCN: ; %bb.0:
77
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
9-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
10-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
9+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
1110
; GCN-NEXT: ; implicit-def: $vgpr5
1211
; GCN-NEXT: ; implicit-def: $vgpr4
13-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
14-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
12+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
13+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1514
; GCN-NEXT: s_cbranch_execz .LBB0_2
1615
; GCN-NEXT: ; %bb.1: ; %F
1716
; GCN-NEXT: s_mov_b32 s10, 0
@@ -101,11 +100,10 @@ define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
101100
; GCN: ; %bb.0:
102101
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103102
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
104-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
105-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
103+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
106104
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
107-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
108-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
105+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
106+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
109107
; GCN-NEXT: s_cbranch_execz .LBB1_2
110108
; GCN-NEXT: ; %bb.1: ; %F
111109
; GCN-NEXT: s_mov_b32 s10, 0
@@ -172,11 +170,10 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
172170
; GCN: ; %bb.0:
173171
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174172
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
175-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
176-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
173+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
177174
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
178-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
179-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
175+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
176+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
180177
; GCN-NEXT: s_cbranch_execz .LBB2_2
181178
; GCN-NEXT: ; %bb.1: ; %F
182179
; GCN-NEXT: s_mov_b32 s10, 0
@@ -249,11 +246,10 @@ define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
249246
; GCN: ; %bb.0:
250247
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251248
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
252-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
253-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
249+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
254250
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
255-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
256-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
251+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
252+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
257253
; GCN-NEXT: s_cbranch_execz .LBB3_2
258254
; GCN-NEXT: ; %bb.1: ; %F
259255
; GCN-NEXT: s_mov_b32 s10, 0
@@ -353,11 +349,10 @@ define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
353349
; GCN: ; %bb.0:
354350
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355351
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
356-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
357-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
352+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
358353
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
359-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
360-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
354+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
355+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
361356
; GCN-NEXT: s_cbranch_execz .LBB4_2
362357
; GCN-NEXT: ; %bb.1: ; %F
363358
; GCN-NEXT: s_mov_b32 s10, 0
@@ -424,11 +419,10 @@ define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
424419
; GCN: ; %bb.0:
425420
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426421
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
427-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
428-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
422+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
429423
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
430-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
431-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
424+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
425+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
432426
; GCN-NEXT: s_cbranch_execz .LBB5_2
433427
; GCN-NEXT: ; %bb.1: ; %F
434428
; GCN-NEXT: s_mov_b32 s10, 0
@@ -501,11 +495,10 @@ define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
501495
; GCN: ; %bb.0:
502496
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503497
; GCN-NEXT: v_and_b32_e32 v4, 1, v4
504-
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
505-
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
498+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v4
506499
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35
507-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
508-
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
500+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
501+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
509502
; GCN-NEXT: s_cbranch_execz .LBB6_2
510503
; GCN-NEXT: ; %bb.1: ; %F
511504
; GCN-NEXT: s_mov_b32 s10, 0

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) {
66
; CHECK: ; %bb.0: ; %entry
77
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88
; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
9-
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
10-
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
9+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
1110
; CHECK-NEXT: s_mov_b32 s8, 0
1211
; CHECK-NEXT: .LBB0_1: ; %do.body
1312
; CHECK-NEXT: ; =>This Loop Header: Depth=1

llvm/test/CodeGen/AMDGPU/function-args.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
102102
; CIGFX89: ; %bb.0: ; %bb
103103
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104104
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
105-
; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
106-
; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1
107-
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
105+
; CIGFX89-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
106+
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], vcc
108107
; CIGFX89-NEXT: s_cbranch_execz .LBB3_2
109108
; CIGFX89-NEXT: ; %bb.1: ; %bb1
110109
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
@@ -120,15 +119,14 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
120119
; GFX11: ; %bb.0: ; %bb
121120
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122121
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
123-
; GFX11-NEXT: s_mov_b32 s2, -1
124-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
125-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
126-
; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1
127-
; GFX11-NEXT: s_and_saveexec_b32 s0, s1
122+
; GFX11-NEXT: s_mov_b32 s0, exec_lo
123+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
124+
; GFX11-NEXT: v_cmpx_ne_u32_e32 1, v0
128125
; GFX11-NEXT: s_cbranch_execz .LBB3_2
129126
; GFX11-NEXT: ; %bb.1: ; %bb1
130127
; GFX11-NEXT: v_mov_b32_e32 v0, 0
131128
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
129+
; GFX11-NEXT: s_mov_b32 s2, -1
132130
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
133131
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
134132
; GFX11-NEXT: .LBB3_2: ; %bb2

0 commit comments

Comments
 (0)