@@ -89,8 +89,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
89
89
; CHECK: ; %bb.0:
90
90
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
91
91
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
92
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
93
- ; CHECK-NEXT: s_cbranch_scc1 .LBB7_2
92
+ ; CHECK-NEXT: s_cbranch_vccz .LBB7_2
94
93
; CHECK-NEXT: ; %bb.1: ; %true
95
94
; CHECK-NEXT: s_mov_b32 s0, 42
96
95
; CHECK-NEXT: s_branch .LBB7_3
@@ -112,9 +111,8 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
112
111
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
113
112
; CHECK: ; %bb.0:
114
113
; CHECK-NEXT: s_and_b32 s0, s0, 1
115
- ; CHECK-NEXT: v_cmp_ne_u32_e64 s0, s0, 0
116
- ; CHECK-NEXT: s_cmp_eq_u32 s0, 0
117
- ; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
114
+ ; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0
115
+ ; CHECK-NEXT: s_cbranch_vccz .LBB8_2
118
116
; CHECK-NEXT: ; %bb.1: ; %true
119
117
; CHECK-NEXT: s_mov_b32 s0, 42
120
118
; CHECK-NEXT: s_branch .LBB8_3
@@ -137,8 +135,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
137
135
; CHECK: ; %bb.0:
138
136
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
139
137
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
140
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
141
- ; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
138
+ ; CHECK-NEXT: s_cbranch_vccz .LBB9_2
142
139
; CHECK-NEXT: ; %bb.1: ; %false
143
140
; CHECK-NEXT: s_mov_b32 s0, 33
144
141
; CHECK-NEXT: s_branch .LBB9_3
@@ -160,9 +157,8 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
160
157
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
161
158
; CHECK: ; %bb.0:
162
159
; CHECK-NEXT: s_and_b32 s0, s0, 1
163
- ; CHECK-NEXT: v_cmp_ne_u32_e64 s0, s0, 0
164
- ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
165
- ; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
160
+ ; CHECK-NEXT: v_cmp_ne_u32_e64 vcc_lo, s0, 0
161
+ ; CHECK-NEXT: s_cbranch_vccz .LBB10_2
166
162
; CHECK-NEXT: ; %bb.1: ; %false
167
163
; CHECK-NEXT: s_mov_b32 s0, 33
168
164
; CHECK-NEXT: s_branch .LBB10_3
@@ -184,8 +180,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
184
180
; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
185
181
; CHECK: ; %bb.0:
186
182
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
187
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
188
- ; CHECK-NEXT: s_cbranch_scc1 .LBB11_2
183
+ ; CHECK-NEXT: s_cbranch_vccz .LBB11_2
189
184
; CHECK-NEXT: ; %bb.1: ; %true
190
185
; CHECK-NEXT: s_mov_b32 s0, 42
191
186
; CHECK-NEXT: s_branch .LBB11_3
@@ -206,9 +201,8 @@ false:
206
201
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare (i32 inreg %v ) {
207
202
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
208
203
; CHECK: ; %bb.0:
209
- ; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
210
- ; CHECK-NEXT: s_cmp_eq_u32 s0, 0
211
- ; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
204
+ ; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12
205
+ ; CHECK-NEXT: s_cbranch_vccz .LBB12_2
212
206
; CHECK-NEXT: ; %bb.1: ; %true
213
207
; CHECK-NEXT: s_mov_b32 s0, 42
214
208
; CHECK-NEXT: s_branch .LBB12_3
@@ -230,8 +224,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
230
224
; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
231
225
; CHECK: ; %bb.0:
232
226
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
233
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
234
- ; CHECK-NEXT: s_cbranch_scc0 .LBB13_2
227
+ ; CHECK-NEXT: s_cbranch_vccz .LBB13_2
235
228
; CHECK-NEXT: ; %bb.1: ; %false
236
229
; CHECK-NEXT: s_mov_b32 s0, 33
237
230
; CHECK-NEXT: s_branch .LBB13_3
@@ -252,9 +245,8 @@ false:
252
245
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare (i32 inreg %v ) {
253
246
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
254
247
; CHECK: ; %bb.0:
255
- ; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
256
- ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
257
- ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
248
+ ; CHECK-NEXT: v_cmp_lt_u32_e64 vcc_lo, s0, 12
249
+ ; CHECK-NEXT: s_cbranch_vccz .LBB14_2
258
250
; CHECK-NEXT: ; %bb.1: ; %false
259
251
; CHECK-NEXT: s_mov_b32 s0, 33
260
252
; CHECK-NEXT: s_branch .LBB14_3
@@ -277,11 +269,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
277
269
; CHECK: ; %bb.0:
278
270
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
279
271
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
280
- ; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
281
- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
282
- ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
283
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
284
- ; CHECK-NEXT: s_cbranch_scc1 .LBB15_2
272
+ ; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
273
+ ; CHECK-NEXT: s_cbranch_vccz .LBB15_2
285
274
; CHECK-NEXT: ; %bb.1: ; %true
286
275
; CHECK-NEXT: s_mov_b32 s0, 42
287
276
; CHECK-NEXT: s_branch .LBB15_3
@@ -309,10 +298,8 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg
309
298
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
310
299
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
311
300
; CHECK-NEXT: s_and_b32 s0, s0, s1
312
- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
313
- ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
314
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
315
- ; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
301
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
302
+ ; CHECK-NEXT: s_cbranch_scc0 .LBB16_2
316
303
; CHECK-NEXT: ; %bb.1: ; %true
317
304
; CHECK-NEXT: s_mov_b32 s0, 42
318
305
; CHECK-NEXT: s_branch .LBB16_3
@@ -337,11 +324,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
337
324
; CHECK: ; %bb.0:
338
325
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
339
326
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
340
- ; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
341
- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
342
- ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
343
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
344
- ; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
327
+ ; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
328
+ ; CHECK-NEXT: s_cbranch_vccz .LBB17_2
345
329
; CHECK-NEXT: ; %bb.1: ; %false
346
330
; CHECK-NEXT: s_mov_b32 s0, 33
347
331
; CHECK-NEXT: s_branch .LBB17_3
@@ -369,9 +353,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
369
353
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
370
354
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
371
355
; CHECK-NEXT: s_and_b32 s0, s0, s1
372
- ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
373
- ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
374
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
356
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
375
357
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
376
358
; CHECK-NEXT: ; %bb.1: ; %false
377
359
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -391,3 +373,26 @@ true:
391
373
false:
392
374
ret i32 33
393
375
}
376
+
377
+ define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare (i32 inreg %v ) {
378
+ ; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare:
379
+ ; CHECK: ; %bb.0:
380
+ ; CHECK-NEXT: v_cmp_lt_u32_e64 s0, s0, 12
381
+ ; CHECK-NEXT: s_cmp_lt_i32 s0, 23
382
+ ; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
383
+ ; CHECK-NEXT: ; %bb.1: ; %true
384
+ ; CHECK-NEXT: s_mov_b32 s0, 42
385
+ ; CHECK-NEXT: s_branch .LBB19_3
386
+ ; CHECK-NEXT: .LBB19_2: ; %false
387
+ ; CHECK-NEXT: s_mov_b32 s0, 33
388
+ ; CHECK-NEXT: s_branch .LBB19_3
389
+ ; CHECK-NEXT: .LBB19_3:
390
+ %c = icmp ult i32 %v , 12
391
+ %ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
392
+ %bc = icmp sgt i32 %ballot , 22
393
+ br i1 %bc , label %true , label %false
394
+ true:
395
+ ret i32 42
396
+ false:
397
+ ret i32 33
398
+ }
0 commit comments