3
3
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s
4
4
5
5
declare i32 @llvm.amdgcn.ballot.i32 (i1 )
6
- declare i64 @llvm.amdgcn.ballot.i64 (i1 )
7
6
declare i32 @llvm.ctpop.i32 (i32 )
8
7
9
8
; Test ballot(0)
@@ -204,30 +203,6 @@ false:
204
203
ret i32 33
205
204
}
206
205
207
- define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare (i32 %v ) {
208
- ; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare:
209
- ; CHECK: ; %bb.0:
210
- ; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 12, v0
211
- ; CHECK-NEXT: s_mov_b32 s1, 0
212
- ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
213
- ; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
214
- ; CHECK-NEXT: ; %bb.1: ; %true
215
- ; CHECK-NEXT: s_mov_b32 s0, 42
216
- ; CHECK-NEXT: s_branch .LBB12_3
217
- ; CHECK-NEXT: .LBB12_2: ; %false
218
- ; CHECK-NEXT: s_mov_b32 s0, 33
219
- ; CHECK-NEXT: s_branch .LBB12_3
220
- ; CHECK-NEXT: .LBB12_3:
221
- %c = icmp ult i32 %v , 12
222
- %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
223
- %ballot_ne_zero = icmp ne i64 %ballot , 0
224
- br i1 %ballot_ne_zero , label %true , label %false
225
- true:
226
- ret i32 42
227
- false:
228
- ret i32 33
229
- }
230
-
231
206
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare (i32 inreg %v ) {
232
207
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
233
208
; CHECK: ; %bb.0:
@@ -236,14 +211,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
236
211
; CHECK-NEXT: s_and_b32 s0, 1, s0
237
212
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
238
213
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
239
- ; CHECK-NEXT: s_cbranch_scc1 .LBB13_2
214
+ ; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
240
215
; CHECK-NEXT: ; %bb.1: ; %true
241
216
; CHECK-NEXT: s_mov_b32 s0, 42
242
- ; CHECK-NEXT: s_branch .LBB13_3
243
- ; CHECK-NEXT: .LBB13_2 : ; %false
217
+ ; CHECK-NEXT: s_branch .LBB12_3
218
+ ; CHECK-NEXT: .LBB12_2 : ; %false
244
219
; CHECK-NEXT: s_mov_b32 s0, 33
245
- ; CHECK-NEXT: s_branch .LBB13_3
246
- ; CHECK-NEXT: .LBB13_3 :
220
+ ; CHECK-NEXT: s_branch .LBB12_3
221
+ ; CHECK-NEXT: .LBB12_3 :
247
222
%c = icmp ult i32 %v , 12
248
223
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
249
224
%ballot_ne_zero = icmp ne i32 %ballot , 0
@@ -259,14 +234,14 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
259
234
; CHECK: ; %bb.0:
260
235
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
261
236
; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
262
- ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
237
+ ; CHECK-NEXT: s_cbranch_scc0 .LBB13_2
263
238
; CHECK-NEXT: ; %bb.1: ; %false
264
239
; CHECK-NEXT: s_mov_b32 s0, 33
265
- ; CHECK-NEXT: s_branch .LBB14_3
266
- ; CHECK-NEXT: .LBB14_2 : ; %true
240
+ ; CHECK-NEXT: s_branch .LBB13_3
241
+ ; CHECK-NEXT: .LBB13_2 : ; %true
267
242
; CHECK-NEXT: s_mov_b32 s0, 42
268
- ; CHECK-NEXT: s_branch .LBB14_3
269
- ; CHECK-NEXT: .LBB14_3 :
243
+ ; CHECK-NEXT: s_branch .LBB13_3
244
+ ; CHECK-NEXT: .LBB13_3 :
270
245
%c = icmp ult i32 %v , 12
271
246
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
272
247
%ballot_eq_zero = icmp eq i32 %ballot , 0
@@ -285,14 +260,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
285
260
; CHECK-NEXT: s_and_b32 s0, 1, s0
286
261
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
287
262
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
288
- ; CHECK-NEXT: s_cbranch_scc0 .LBB15_2
263
+ ; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
289
264
; CHECK-NEXT: ; %bb.1: ; %false
290
265
; CHECK-NEXT: s_mov_b32 s0, 33
291
- ; CHECK-NEXT: s_branch .LBB15_3
292
- ; CHECK-NEXT: .LBB15_2 : ; %true
266
+ ; CHECK-NEXT: s_branch .LBB14_3
267
+ ; CHECK-NEXT: .LBB14_2 : ; %true
293
268
; CHECK-NEXT: s_mov_b32 s0, 42
294
- ; CHECK-NEXT: s_branch .LBB15_3
295
- ; CHECK-NEXT: .LBB15_3 :
269
+ ; CHECK-NEXT: s_branch .LBB14_3
270
+ ; CHECK-NEXT: .LBB14_3 :
296
271
%c = icmp ult i32 %v , 12
297
272
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
298
273
%ballot_eq_zero = icmp eq i32 %ballot , 0
@@ -310,14 +285,14 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
310
285
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
311
286
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
312
287
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
313
- ; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
288
+ ; CHECK-NEXT: s_cbranch_scc1 .LBB15_2
314
289
; CHECK-NEXT: ; %bb.1: ; %true
315
290
; CHECK-NEXT: s_mov_b32 s0, 42
316
- ; CHECK-NEXT: s_branch .LBB16_3
317
- ; CHECK-NEXT: .LBB16_2 : ; %false
291
+ ; CHECK-NEXT: s_branch .LBB15_3
292
+ ; CHECK-NEXT: .LBB15_2 : ; %false
318
293
; CHECK-NEXT: s_mov_b32 s0, 33
319
- ; CHECK-NEXT: s_branch .LBB16_3
320
- ; CHECK-NEXT: .LBB16_3 :
294
+ ; CHECK-NEXT: s_branch .LBB15_3
295
+ ; CHECK-NEXT: .LBB15_3 :
321
296
%v1c = icmp ult i32 %v1 , 12
322
297
%v2c = icmp ugt i32 %v2 , 34
323
298
%c = and i1 %v1c , %v2c
@@ -330,34 +305,6 @@ false:
330
305
ret i32 33
331
306
}
332
307
333
- define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and (i32 %v1 , i32 %v2 ) {
334
- ; CHECK-LABEL: branch_divergent_ballot64_ne_zero_and:
335
- ; CHECK: ; %bb.0:
336
- ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
337
- ; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
338
- ; CHECK-NEXT: s_mov_b32 s1, 0
339
- ; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
340
- ; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
341
- ; CHECK-NEXT: s_cbranch_scc1 .LBB17_2
342
- ; CHECK-NEXT: ; %bb.1: ; %true
343
- ; CHECK-NEXT: s_mov_b32 s0, 42
344
- ; CHECK-NEXT: s_branch .LBB17_3
345
- ; CHECK-NEXT: .LBB17_2: ; %false
346
- ; CHECK-NEXT: s_mov_b32 s0, 33
347
- ; CHECK-NEXT: s_branch .LBB17_3
348
- ; CHECK-NEXT: .LBB17_3:
349
- %v1c = icmp ult i32 %v1 , 12
350
- %v2c = icmp ugt i32 %v2 , 34
351
- %c = and i1 %v1c , %v2c
352
- %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
353
- %ballot_ne_zero = icmp ne i64 %ballot , 0
354
- br i1 %ballot_ne_zero , label %true , label %false
355
- true:
356
- ret i32 42
357
- false:
358
- ret i32 33
359
- }
360
-
361
308
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and (i32 inreg %v1 , i32 inreg %v2 ) {
362
309
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
363
310
; CHECK: ; %bb.0:
@@ -369,14 +316,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg
369
316
; CHECK-NEXT: s_and_b32 s0, 1, s0
370
317
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
371
318
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
372
- ; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
319
+ ; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
373
320
; CHECK-NEXT: ; %bb.1: ; %true
374
321
; CHECK-NEXT: s_mov_b32 s0, 42
375
- ; CHECK-NEXT: s_branch .LBB18_3
376
- ; CHECK-NEXT: .LBB18_2 : ; %false
322
+ ; CHECK-NEXT: s_branch .LBB16_3
323
+ ; CHECK-NEXT: .LBB16_2 : ; %false
377
324
; CHECK-NEXT: s_mov_b32 s0, 33
378
- ; CHECK-NEXT: s_branch .LBB18_3
379
- ; CHECK-NEXT: .LBB18_3 :
325
+ ; CHECK-NEXT: s_branch .LBB16_3
326
+ ; CHECK-NEXT: .LBB16_3 :
380
327
%v1c = icmp ult i32 %v1 , 12
381
328
%v2c = icmp ugt i32 %v2 , 34
382
329
%c = and i1 %v1c , %v2c
@@ -396,14 +343,14 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
396
343
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
397
344
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
398
345
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
399
- ; CHECK-NEXT: s_cbranch_scc0 .LBB19_2
346
+ ; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
400
347
; CHECK-NEXT: ; %bb.1: ; %false
401
348
; CHECK-NEXT: s_mov_b32 s0, 33
402
- ; CHECK-NEXT: s_branch .LBB19_3
403
- ; CHECK-NEXT: .LBB19_2 : ; %true
349
+ ; CHECK-NEXT: s_branch .LBB17_3
350
+ ; CHECK-NEXT: .LBB17_2 : ; %true
404
351
; CHECK-NEXT: s_mov_b32 s0, 42
405
- ; CHECK-NEXT: s_branch .LBB19_3
406
- ; CHECK-NEXT: .LBB19_3 :
352
+ ; CHECK-NEXT: s_branch .LBB17_3
353
+ ; CHECK-NEXT: .LBB17_3 :
407
354
%v1c = icmp ult i32 %v1 , 12
408
355
%v2c = icmp ugt i32 %v2 , 34
409
356
%c = and i1 %v1c , %v2c
@@ -427,14 +374,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
427
374
; CHECK-NEXT: s_and_b32 s0, 1, s0
428
375
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
429
376
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
430
- ; CHECK-NEXT: s_cbranch_scc0 .LBB20_2
377
+ ; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
431
378
; CHECK-NEXT: ; %bb.1: ; %false
432
379
; CHECK-NEXT: s_mov_b32 s0, 33
433
- ; CHECK-NEXT: s_branch .LBB20_3
434
- ; CHECK-NEXT: .LBB20_2 : ; %true
380
+ ; CHECK-NEXT: s_branch .LBB18_3
381
+ ; CHECK-NEXT: .LBB18_2 : ; %true
435
382
; CHECK-NEXT: s_mov_b32 s0, 42
436
- ; CHECK-NEXT: s_branch .LBB20_3
437
- ; CHECK-NEXT: .LBB20_3 :
383
+ ; CHECK-NEXT: s_branch .LBB18_3
384
+ ; CHECK-NEXT: .LBB18_3 :
438
385
%v1c = icmp ult i32 %v1 , 12
439
386
%v2c = icmp ugt i32 %v2 , 34
440
387
%c = and i1 %v1c , %v2c
@@ -455,14 +402,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
455
402
; CHECK-NEXT: s_and_b32 s0, 1, s0
456
403
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
457
404
; CHECK-NEXT: s_cmp_le_i32 s0, 22
458
- ; CHECK-NEXT: s_cbranch_scc1 .LBB21_2
405
+ ; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
459
406
; CHECK-NEXT: ; %bb.1: ; %true
460
407
; CHECK-NEXT: s_mov_b32 s0, 42
461
- ; CHECK-NEXT: s_branch .LBB21_3
462
- ; CHECK-NEXT: .LBB21_2 : ; %false
408
+ ; CHECK-NEXT: s_branch .LBB19_3
409
+ ; CHECK-NEXT: .LBB19_2 : ; %false
463
410
; CHECK-NEXT: s_mov_b32 s0, 33
464
- ; CHECK-NEXT: s_branch .LBB21_3
465
- ; CHECK-NEXT: .LBB21_3 :
411
+ ; CHECK-NEXT: s_branch .LBB19_3
412
+ ; CHECK-NEXT: .LBB19_3 :
466
413
%c = icmp ult i32 %v , 12
467
414
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
468
415
%bc = icmp sgt i32 %ballot , 22
0 commit comments