@@ -416,22 +416,19 @@ define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
416
416
; GCN: ; %bb.0:
417
417
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418
418
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
419
- ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
420
- ; GCN-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
421
419
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
422
420
; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
423
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
421
+ ; GCN-NEXT: v_cndmask_b32_e64 v1, - v4, - v2, vcc
424
422
; GCN-NEXT: s_setpc_b64 s[30:31]
425
423
;
426
424
; GFX11-LABEL: fneg_xor_select_f64:
427
425
; GFX11: ; %bb.0:
428
426
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429
- ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
430
- ; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
431
427
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
432
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
428
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
433
429
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
434
- ; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v1 :: v_dual_cndmask_b32 v1, v4, v2
430
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
431
+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo
435
432
; GFX11-NEXT: s_setpc_b64 s[30:31]
436
433
%select = select i1 %cond , double %arg0 , double %arg1
437
434
%fneg = fneg double %select
@@ -1642,16 +1639,19 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
1642
1639
; GFX7-NEXT: s_add_i32 s12, s12, s17
1643
1640
; GFX7-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
1644
1641
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1645
- ; GFX7-NEXT: s_and_b32 s6, 1, s6
1642
+ ; GFX7-NEXT: s_bitcmp1_b32 s6, 0
1643
+ ; GFX7-NEXT: s_cselect_b64 vcc, -1, 0
1644
+ ; GFX7-NEXT: s_and_b64 s[6:7], vcc, exec
1645
+ ; GFX7-NEXT: v_mov_b32_e32 v0, s3
1646
+ ; GFX7-NEXT: v_mov_b32_e32 v1, s1
1646
1647
; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1647
- ; GFX7-NEXT: s_xor_b32 s3, s1, 0x80000000
1648
- ; GFX7-NEXT: s_cmp_eq_u32 s6, 1
1648
+ ; GFX7-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1649
1649
; GFX7-NEXT: s_cselect_b32 s0, s0, s2
1650
- ; GFX7-NEXT: s_cselect_b32 s1, s3 , s1
1650
+ ; GFX7-NEXT: v_mov_b32_e32 v1 , s1
1651
1651
; GFX7-NEXT: v_mov_b32_e32 v2, s4
1652
1652
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
1653
+ ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1653
1654
; GFX7-NEXT: v_mov_b32_e32 v0, s0
1654
- ; GFX7-NEXT: v_mov_b32_e32 v1, s1
1655
1655
; GFX7-NEXT: v_mov_b32_e32 v3, s5
1656
1656
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1657
1657
; GFX7-NEXT: s_endpgm
@@ -1663,32 +1663,37 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
1663
1663
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
1664
1664
; GFX9-NEXT: v_mov_b32_e32 v2, 0
1665
1665
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1666
- ; GFX9-NEXT: s_and_b32 s6, 1, s6
1666
+ ; GFX9-NEXT: s_bitcmp1_b32 s6, 0
1667
+ ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
1668
+ ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec
1669
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s3
1670
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1667
1671
; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1668
- ; GFX9-NEXT: s_xor_b32 s3, s1, 0x80000000
1669
- ; GFX9-NEXT: s_cmp_eq_u32 s6, 1
1672
+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1670
1673
; GFX9-NEXT: s_cselect_b32 s0, s0, s2
1671
- ; GFX9-NEXT: s_cselect_b32 s1, s3, s1
1672
- ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1673
1674
; GFX9-NEXT: v_mov_b32_e32 v1, s1
1675
+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1676
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1674
1677
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
1675
1678
; GFX9-NEXT: s_endpgm
1676
1679
;
1677
1680
; GFX11-LABEL: multiple_uses_fneg_select_f64:
1678
1681
; GFX11: ; %bb.0:
1679
1682
; GFX11-NEXT: s_clause 0x2
1680
- ; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
1681
1683
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1684
+ ; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
1682
1685
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18
1683
1686
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1684
- ; GFX11-NEXT: s_and_b32 s6, 1, s6
1687
+ ; GFX11-NEXT: v_mov_b32_e32 v0, s1
1688
+ ; GFX11-NEXT: s_bitcmp1_b32 s6, 0
1689
+ ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1690
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1691
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, -s3, -v0, vcc_lo
1692
+ ; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo
1685
1693
; GFX11-NEXT: s_cselect_b32 s1, s1, s3
1686
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
1687
- ; GFX11-NEXT: s_xor_b32 s3, s1, 0x80000000
1688
- ; GFX11-NEXT: s_cmp_eq_u32 s6, 1
1689
1694
; GFX11-NEXT: s_cselect_b32 s0, s0, s2
1690
- ; GFX11-NEXT: s_cselect_b32 s1, s3, s1
1691
- ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1
1695
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1696
+ ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, s1, v0
1692
1697
; GFX11-NEXT: v_mov_b32_e32 v0, s0
1693
1698
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
1694
1699
; GFX11-NEXT: s_endpgm
0 commit comments