@@ -398,19 +398,18 @@ define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
398
398
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
399
399
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
400
400
; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
401
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
402
- ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
401
+ ; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc
403
402
; GCN-NEXT: s_setpc_b64 s[30:31]
404
403
;
405
404
; GFX11-LABEL: fneg_xor_select_f64:
406
405
; GFX11: ; %bb.0:
407
406
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408
407
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
409
408
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
410
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
409
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
411
410
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
412
- ; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v1 :: v_dual_cndmask_b32 v1, v4, v2
413
- ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
411
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
412
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
414
413
; GFX11-NEXT: s_setpc_b64 s[30:31]
415
414
%select = select i1 %cond , double %arg0 , double %arg1
416
415
%fneg = fneg double %select
@@ -422,38 +421,38 @@ define double @fneg_xor_select_f64_multi_user(i1 %cond, double %arg0, double %ar
422
421
; GFX7: ; %bb.0:
423
422
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424
423
; GFX7-NEXT: v_and_b32_e32 v0, 1, v0
425
- ; GFX7-NEXT: v_mov_b32_e32 v7, v1
426
424
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
427
- ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
428
- ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
425
+ ; GFX7-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc
426
+ ; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
427
+ ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
429
428
; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1]
430
- ; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
429
+ ; GFX7-NEXT: v_mov_b32_e32 v1, v2
431
430
; GFX7-NEXT: s_waitcnt vmcnt(0)
432
431
; GFX7-NEXT: s_setpc_b64 s[30:31]
433
432
;
434
433
; GFX9-LABEL: fneg_xor_select_f64_multi_user:
435
434
; GFX9: ; %bb.0:
436
435
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437
436
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
438
- ; GFX9-NEXT: v_mov_b32_e32 v7, v1
439
437
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
440
- ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
441
- ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc
438
+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc
439
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
440
+ ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
442
441
; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off
443
- ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
442
+ ; GFX9-NEXT: v_mov_b32_e32 v1, v2
444
443
; GFX9-NEXT: s_waitcnt vmcnt(0)
445
444
; GFX9-NEXT: s_setpc_b64 s[30:31]
446
445
;
447
446
; GFX11-LABEL: fneg_xor_select_f64_multi_user:
448
447
; GFX11: ; %bb.0:
449
448
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450
449
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
451
- ; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0
452
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT ) | instid1(VALU_DEP_2)
450
+ ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
451
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2)
453
452
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
454
- ; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7
455
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
456
- ; GFX11-NEXT: v_xor_b32_e32 v2 , 0x80000000, v1
453
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, - v4, -v2, vcc_lo
454
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
455
+ ; GFX11-NEXT: v_xor_b32_e32 v1 , 0x80000000, v2
457
456
; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off
458
457
; GFX11-NEXT: v_mov_b32_e32 v1, v2
459
458
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
@@ -497,14 +496,13 @@ define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, d
497
496
; GCN: ; %bb.0:
498
497
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499
498
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
499
+ ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
500
500
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
501
501
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
502
- ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
503
502
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
504
- ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
505
- ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
503
+ ; GCN-NEXT: v_cndmask_b32_e64 v2, -v3, -v5, vcc
506
504
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
507
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3 , vcc
505
+ ; GCN-NEXT: v_cndmask_b32_e64 v1, - v2, v2 , vcc
508
506
; GCN-NEXT: s_setpc_b64 s[30:31]
509
507
;
510
508
; GFX11-LABEL: select_fneg_select_fneg_f64:
@@ -513,16 +511,13 @@ define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, d
513
511
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
514
512
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
515
513
; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
516
- ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
517
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
514
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
518
515
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
519
- ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
520
- ; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
521
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4 ) | instskip(NEXT) | instid1(VALU_DEP_2)
516
+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
517
+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, - v3, - v5, vcc_lo
518
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_2)
522
519
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
523
- ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
524
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
525
- ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
520
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v2, v2, vcc_lo
526
521
; GFX11-NEXT: s_setpc_b64 s[30:31]
527
522
%fneg0 = fneg double %arg0
528
523
%select0 = select i1 %cond0 , double %arg1 , double %fneg0
@@ -894,10 +889,9 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
894
889
; GCN-NEXT: v_and_b32_e32 v5, 1, v0
895
890
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
896
891
; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
897
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
898
- ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
892
+ ; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, -v4, vcc
899
893
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
900
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2 , vcc
894
+ ; GCN-NEXT: v_cndmask_b32_e64 v1, - v1, v1 , vcc
901
895
; GCN-NEXT: v_mov_b32_e32 v0, v3
902
896
; GCN-NEXT: s_setpc_b64 s[30:31]
903
897
;
@@ -909,12 +903,11 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
909
903
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
910
904
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
911
905
; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
912
- ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
906
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, - v2, - v4, vcc_lo
913
907
; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0
914
908
; GFX11-NEXT: v_mov_b32_e32 v0, v3
915
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
916
- ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
917
- ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
909
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
910
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v1, v1, vcc_lo
918
911
; GFX11-NEXT: s_setpc_b64 s[30:31]
919
912
%i = and i32 %arg , 1
920
913
%i3 = icmp eq i32 %i , 0
0 commit comments