@@ -33,7 +33,8 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
33
33
; CHECK-LABEL: non_compare:
34
34
; CHECK: ; %bb.0:
35
35
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
36
- ; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
36
+ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
37
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
37
38
; CHECK-NEXT: ; return to shader part epilog
38
39
%trunc = trunc i32 %x to i1
39
40
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %trunc )
@@ -45,7 +46,8 @@ define amdgpu_cs i32 @non_compare(i32 %x) {
45
46
define amdgpu_cs i32 @compare_ints (i32 %x , i32 %y ) {
46
47
; CHECK-LABEL: compare_ints:
47
48
; CHECK: ; %bb.0:
48
- ; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1
49
+ ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
50
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
49
51
; CHECK-NEXT: ; return to shader part epilog
50
52
%cmp = icmp eq i32 %x , %y
51
53
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %cmp )
@@ -55,7 +57,8 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
55
57
define amdgpu_cs i32 @compare_int_with_constant (i32 %x ) {
56
58
; CHECK-LABEL: compare_int_with_constant:
57
59
; CHECK: ; %bb.0:
58
- ; CHECK-NEXT: v_cmp_le_i32_e64 s0, 0x63, v0
60
+ ; CHECK-NEXT: v_cmp_le_i32_e32 vcc_lo, 0x63, v0
61
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
59
62
; CHECK-NEXT: ; return to shader part epilog
60
63
%cmp = icmp sge i32 %x , 99
61
64
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %cmp )
@@ -65,7 +68,8 @@ define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
65
68
define amdgpu_cs i32 @compare_floats (float %x , float %y ) {
66
69
; CHECK-LABEL: compare_floats:
67
70
; CHECK: ; %bb.0:
68
- ; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
71
+ ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
72
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
69
73
; CHECK-NEXT: ; return to shader part epilog
70
74
%cmp = fcmp ogt float %x , %y
71
75
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %cmp )
@@ -76,7 +80,8 @@ define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
76
80
; CHECK-LABEL: ctpop_of_ballot:
77
81
; CHECK: ; %bb.0:
78
82
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
79
- ; CHECK-NEXT: s_bcnt1_i32_b32 s0, vcc_lo
83
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
84
+ ; CHECK-NEXT: s_bcnt1_i32_b32 s0, s0
80
85
; CHECK-NEXT: ; return to shader part epilog
81
86
%cmp = fcmp ogt float %x , %y
82
87
%ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %cmp )
@@ -89,7 +94,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
89
94
; CHECK: ; %bb.0:
90
95
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
91
96
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
92
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
97
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
98
+ ; CHECK-NEXT: s_cmp_eq_u32 s0, 0
93
99
; CHECK-NEXT: s_cbranch_scc1 .LBB7_2
94
100
; CHECK-NEXT: ; %bb.1: ; %true
95
101
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -113,6 +119,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
113
119
; CHECK: ; %bb.0:
114
120
; CHECK-NEXT: s_and_b32 s0, 1, s0
115
121
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
122
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
116
123
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
117
124
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
118
125
; CHECK-NEXT: ; %bb.1: ; %true
@@ -137,7 +144,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
137
144
; CHECK: ; %bb.0:
138
145
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
139
146
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
140
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
147
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
148
+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
141
149
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
142
150
; CHECK-NEXT: ; %bb.1: ; %false
143
151
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -161,6 +169,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
161
169
; CHECK: ; %bb.0:
162
170
; CHECK-NEXT: s_and_b32 s0, 1, s0
163
171
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
172
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
164
173
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
165
174
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
166
175
; CHECK-NEXT: ; %bb.1: ; %false
@@ -184,7 +193,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
184
193
; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
185
194
; CHECK: ; %bb.0:
186
195
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
187
- ; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
196
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
197
+ ; CHECK-NEXT: s_cmp_eq_u32 s0, 0
188
198
; CHECK-NEXT: s_cbranch_scc1 .LBB11_2
189
199
; CHECK-NEXT: ; %bb.1: ; %true
190
200
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -210,6 +220,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
210
220
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
211
221
; CHECK-NEXT: s_and_b32 s0, 1, s0
212
222
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
223
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
213
224
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
214
225
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
215
226
; CHECK-NEXT: ; %bb.1: ; %true
@@ -233,7 +244,8 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
233
244
; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
234
245
; CHECK: ; %bb.0:
235
246
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
236
- ; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
247
+ ; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
248
+ ; CHECK-NEXT: s_cmp_lg_u32 s0, 0
237
249
; CHECK-NEXT: s_cbranch_scc0 .LBB13_2
238
250
; CHECK-NEXT: ; %bb.1: ; %false
239
251
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -259,6 +271,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
259
271
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
260
272
; CHECK-NEXT: s_and_b32 s0, 1, s0
261
273
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
274
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
262
275
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
263
276
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
264
277
; CHECK-NEXT: ; %bb.1: ; %false
@@ -284,6 +297,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
284
297
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
285
298
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
286
299
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
300
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
287
301
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
288
302
; CHECK-NEXT: s_cbranch_scc1 .LBB15_2
289
303
; CHECK-NEXT: ; %bb.1: ; %true
@@ -315,6 +329,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg
315
329
; CHECK-NEXT: s_and_b32 s0, s0, s1
316
330
; CHECK-NEXT: s_and_b32 s0, 1, s0
317
331
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
332
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
318
333
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
319
334
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
320
335
; CHECK-NEXT: ; %bb.1: ; %true
@@ -342,6 +357,7 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
342
357
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
343
358
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
344
359
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
360
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
345
361
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
346
362
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
347
363
; CHECK-NEXT: ; %bb.1: ; %false
@@ -373,6 +389,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
373
389
; CHECK-NEXT: s_and_b32 s0, s0, s1
374
390
; CHECK-NEXT: s_and_b32 s0, 1, s0
375
391
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
392
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
376
393
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
377
394
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
378
395
; CHECK-NEXT: ; %bb.1: ; %false
@@ -401,6 +418,7 @@ define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) {
401
418
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
402
419
; CHECK-NEXT: s_and_b32 s0, 1, s0
403
420
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
421
+ ; CHECK-NEXT: s_and_b32 s0, s0, exec_lo
404
422
; CHECK-NEXT: s_cmp_le_i32 s0, 22
405
423
; CHECK-NEXT: s_cbranch_scc1 .LBB19_2
406
424
; CHECK-NEXT: ; %bb.1: ; %true
0 commit comments