@@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
204
204
}
205
205
206
206
define i32 @vector_xnor_i32_one_use (i32 %a , i32 %b ) {
207
- ; GCN-LABEL: vector_xnor_i32_one_use:
208
- ; GCN: ; %bb.0: ; %entry
209
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210
- ; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
211
- ; GCN-NEXT: v_not_b32_e32 v0, v0
212
- ; GCN-NEXT: s_setpc_b64 s[30:31]
207
+ ; GFX7-LABEL: vector_xnor_i32_one_use:
208
+ ; GFX7: ; %bb.0: ; %entry
209
+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210
+ ; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
211
+ ; GFX7-NEXT: v_not_b32_e32 v0, v0
212
+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
213
+ ;
214
+ ; GFX8-LABEL: vector_xnor_i32_one_use:
215
+ ; GFX8: ; %bb.0: ; %entry
216
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217
+ ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
218
+ ; GFX8-NEXT: v_not_b32_e32 v0, v0
219
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
220
+ ;
221
+ ; GFX900-LABEL: vector_xnor_i32_one_use:
222
+ ; GFX900: ; %bb.0: ; %entry
223
+ ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224
+ ; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
225
+ ; GFX900-NEXT: v_not_b32_e32 v0, v0
226
+ ; GFX900-NEXT: s_setpc_b64 s[30:31]
227
+ ;
228
+ ; GFX906-LABEL: vector_xnor_i32_one_use:
229
+ ; GFX906: ; %bb.0: ; %entry
230
+ ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231
+ ; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
232
+ ; GFX906-NEXT: s_setpc_b64 s[30:31]
213
233
;
214
234
; GFX10-LABEL: vector_xnor_i32_one_use:
215
235
; GFX10: ; %bb.0: ; %entry
216
236
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217
- ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
218
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
237
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
219
238
; GFX10-NEXT: s_setpc_b64 s[30:31]
220
239
entry:
221
240
%xor = xor i32 %a , %b
@@ -224,22 +243,45 @@ entry:
224
243
}
225
244
226
245
define i64 @vector_xnor_i64_one_use (i64 %a , i64 %b ) {
227
- ; GCN-LABEL: vector_xnor_i64_one_use:
228
- ; GCN: ; %bb.0: ; %entry
229
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230
- ; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
231
- ; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
232
- ; GCN-NEXT: v_not_b32_e32 v0, v0
233
- ; GCN-NEXT: v_not_b32_e32 v1, v1
234
- ; GCN-NEXT: s_setpc_b64 s[30:31]
246
+ ; GFX7-LABEL: vector_xnor_i64_one_use:
247
+ ; GFX7: ; %bb.0: ; %entry
248
+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249
+ ; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2
250
+ ; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3
251
+ ; GFX7-NEXT: v_not_b32_e32 v0, v0
252
+ ; GFX7-NEXT: v_not_b32_e32 v1, v1
253
+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
254
+ ;
255
+ ; GFX8-LABEL: vector_xnor_i64_one_use:
256
+ ; GFX8: ; %bb.0: ; %entry
257
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258
+ ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
259
+ ; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3
260
+ ; GFX8-NEXT: v_not_b32_e32 v0, v0
261
+ ; GFX8-NEXT: v_not_b32_e32 v1, v1
262
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
263
+ ;
264
+ ; GFX900-LABEL: vector_xnor_i64_one_use:
265
+ ; GFX900: ; %bb.0: ; %entry
266
+ ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267
+ ; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2
268
+ ; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3
269
+ ; GFX900-NEXT: v_not_b32_e32 v0, v0
270
+ ; GFX900-NEXT: v_not_b32_e32 v1, v1
271
+ ; GFX900-NEXT: s_setpc_b64 s[30:31]
272
+ ;
273
+ ; GFX906-LABEL: vector_xnor_i64_one_use:
274
+ ; GFX906: ; %bb.0: ; %entry
275
+ ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276
+ ; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2
277
+ ; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3
278
+ ; GFX906-NEXT: s_setpc_b64 s[30:31]
235
279
;
236
280
; GFX10-LABEL: vector_xnor_i64_one_use:
237
281
; GFX10: ; %bb.0: ; %entry
238
282
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239
- ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
240
- ; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
241
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
242
- ; GFX10-NEXT: v_not_b32_e32 v1, v1
283
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v2
284
+ ; GFX10-NEXT: v_xnor_b32_e32 v1, v1, v3
243
285
; GFX10-NEXT: s_setpc_b64 s[30:31]
244
286
entry:
245
287
%xor = xor i64 %a , %b
@@ -248,16 +290,32 @@ entry:
248
290
}
249
291
250
292
define amdgpu_ps float @xnor_s_v_i32_one_use (i32 inreg %s , i32 %v ) {
251
- ; GCN-LABEL: xnor_s_v_i32_one_use:
252
- ; GCN: ; %bb.0:
253
- ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
254
- ; GCN-NEXT: v_not_b32_e32 v0, v0
255
- ; GCN-NEXT: ; return to shader part epilog
293
+ ; GFX7-LABEL: xnor_s_v_i32_one_use:
294
+ ; GFX7: ; %bb.0:
295
+ ; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
296
+ ; GFX7-NEXT: v_not_b32_e32 v0, v0
297
+ ; GFX7-NEXT: ; return to shader part epilog
298
+ ;
299
+ ; GFX8-LABEL: xnor_s_v_i32_one_use:
300
+ ; GFX8: ; %bb.0:
301
+ ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
302
+ ; GFX8-NEXT: v_not_b32_e32 v0, v0
303
+ ; GFX8-NEXT: ; return to shader part epilog
304
+ ;
305
+ ; GFX900-LABEL: xnor_s_v_i32_one_use:
306
+ ; GFX900: ; %bb.0:
307
+ ; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
308
+ ; GFX900-NEXT: v_not_b32_e32 v0, v0
309
+ ; GFX900-NEXT: ; return to shader part epilog
310
+ ;
311
+ ; GFX906-LABEL: xnor_s_v_i32_one_use:
312
+ ; GFX906: ; %bb.0:
313
+ ; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
314
+ ; GFX906-NEXT: ; return to shader part epilog
256
315
;
257
316
; GFX10-LABEL: xnor_s_v_i32_one_use:
258
317
; GFX10: ; %bb.0:
259
- ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
260
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
318
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
261
319
; GFX10-NEXT: ; return to shader part epilog
262
320
%xor = xor i32 %s , %v
263
321
%d = xor i32 %xor , -1
@@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
266
324
}
267
325
268
326
define amdgpu_ps float @xnor_v_s_i32_one_use (i32 inreg %s , i32 %v ) {
269
- ; GCN-LABEL: xnor_v_s_i32_one_use:
270
- ; GCN: ; %bb.0:
271
- ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
272
- ; GCN-NEXT: v_not_b32_e32 v0, v0
273
- ; GCN-NEXT: ; return to shader part epilog
327
+ ; GFX7-LABEL: xnor_v_s_i32_one_use:
328
+ ; GFX7: ; %bb.0:
329
+ ; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
330
+ ; GFX7-NEXT: v_not_b32_e32 v0, v0
331
+ ; GFX7-NEXT: ; return to shader part epilog
332
+ ;
333
+ ; GFX8-LABEL: xnor_v_s_i32_one_use:
334
+ ; GFX8: ; %bb.0:
335
+ ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
336
+ ; GFX8-NEXT: v_not_b32_e32 v0, v0
337
+ ; GFX8-NEXT: ; return to shader part epilog
338
+ ;
339
+ ; GFX900-LABEL: xnor_v_s_i32_one_use:
340
+ ; GFX900: ; %bb.0:
341
+ ; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
342
+ ; GFX900-NEXT: v_not_b32_e32 v0, v0
343
+ ; GFX900-NEXT: ; return to shader part epilog
344
+ ;
345
+ ; GFX906-LABEL: xnor_v_s_i32_one_use:
346
+ ; GFX906: ; %bb.0:
347
+ ; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
348
+ ; GFX906-NEXT: ; return to shader part epilog
274
349
;
275
350
; GFX10-LABEL: xnor_v_s_i32_one_use:
276
351
; GFX10: ; %bb.0:
277
- ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
278
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
352
+ ; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
279
353
; GFX10-NEXT: ; return to shader part epilog
280
354
%xor = xor i32 %v , %s
281
355
%d = xor i32 %xor , -1
@@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
314
388
; GFX906-LABEL: xnor_i64_s_v_one_use:
315
389
; GFX906: ; %bb.0: ; %entry
316
390
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
317
- ; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
318
- ; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
319
- ; GFX906-NEXT: v_not_b32_e32 v0, v0
320
- ; GFX906-NEXT: v_not_b32_e32 v1, v1
391
+ ; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
392
+ ; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1
321
393
; GFX906-NEXT: ; return to shader part epilog
322
394
;
323
395
; GFX10-LABEL: xnor_i64_s_v_one_use:
324
396
; GFX10: ; %bb.0: ; %entry
325
397
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
326
- ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
327
- ; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
328
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
329
- ; GFX10-NEXT: v_not_b32_e32 v1, v1
398
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
399
+ ; GFX10-NEXT: v_xnor_b32_e32 v1, s1, v1
330
400
; GFX10-NEXT: ; return to shader part epilog
331
401
entry:
332
402
%b = shl i64 %b64 , 29
@@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
367
437
; GFX906-LABEL: xnor_i64_v_s_one_use:
368
438
; GFX906: ; %bb.0:
369
439
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
370
- ; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
371
- ; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
372
- ; GFX906-NEXT: v_not_b32_e32 v0, v0
373
- ; GFX906-NEXT: v_not_b32_e32 v1, v1
440
+ ; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
441
+ ; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1
374
442
; GFX906-NEXT: ; return to shader part epilog
375
443
;
376
444
; GFX10-LABEL: xnor_i64_v_s_one_use:
377
445
; GFX10: ; %bb.0:
378
446
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
379
- ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
380
- ; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
381
- ; GFX10-NEXT: v_not_b32_e32 v0, v0
382
- ; GFX10-NEXT: v_not_b32_e32 v1, v1
447
+ ; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
448
+ ; GFX10-NEXT: v_xnor_b32_e64 v1, v1, s1
383
449
; GFX10-NEXT: ; return to shader part epilog
384
450
%b = shl i64 %b64 , 29
385
451
%xor = xor i64 %b , %a
@@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
419
485
; GFX10-LABEL: vector_xor_na_b_i32_one_use:
420
486
; GFX10: ; %bb.0: ; %entry
421
487
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422
- ; GFX10-NEXT: v_xor3_b32 v0, v0, -1 , v1
488
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
423
489
; GFX10-NEXT: s_setpc_b64 s[30:31]
424
490
entry:
425
491
%na = xor i32 %a , -1
@@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
458
524
; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
459
525
; GFX10: ; %bb.0: ; %entry
460
526
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461
- ; GFX10-NEXT: v_xor3_b32 v0, v1, -1 , v0
527
+ ; GFX10-NEXT: v_xnor_b32_e32 v0, v1, v0
462
528
; GFX10-NEXT: s_setpc_b64 s[30:31]
463
529
entry:
464
530
%nb = xor i32 %b , -1
0 commit comments