Skip to content

Commit 0107c88

Browse files
authored
[RISCV][SDAG] Improve codegen of select with constants if zicond is available (#82456)
This patch uses `add + czero.eqz/nez` to lower select with constants if zicond is available. ``` (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) ``` The above code sequence is suggested by [RISCV Optimization Guide](https://riscv-optimization-guide-riseproject-c94355ae3e6872252baa952524.gitlab.io/riscv-optimization-guide.html#_avoid_branches_using_conditional_moves).
1 parent 9eb5f94 commit 0107c88

File tree

2 files changed

+262
-10
lines changed

2 files changed

+262
-10
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7379,6 +7379,26 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
73797379
if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
73807380
return V;
73817381

7382+
// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7383+
// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7384+
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7385+
const APInt &TrueVal = TrueV->getAsAPIntVal();
7386+
const APInt &FalseVal = FalseV->getAsAPIntVal();
7387+
const int TrueValCost = RISCVMatInt::getIntMatCost(
7388+
TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7389+
const int FalseValCost = RISCVMatInt::getIntMatCost(
7390+
FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7391+
bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7392+
SDValue LHSVal = DAG.getConstant(
7393+
IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7394+
SDValue RHSVal =
7395+
DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7396+
SDValue CMOV =
7397+
DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
7398+
DL, VT, LHSVal, CondV);
7399+
return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7400+
}
7401+
73827402
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
73837403
// Unless we have the short forward branch optimization.
73847404
if (!Subtarget.hasConditionalMoveFusion())

llvm/test/CodeGen/RISCV/select.ll

Lines changed: 242 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,23 +1606,255 @@ define i32 @select_cst_unknown(i32 signext %a, i32 signext %b) {
16061606
; RV64IMXVTCONDOPS-LABEL: select_cst_unknown:
16071607
; RV64IMXVTCONDOPS: # %bb.0:
16081608
; RV64IMXVTCONDOPS-NEXT: slt a0, a0, a1
1609-
; RV64IMXVTCONDOPS-NEXT: li a1, -7
1610-
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a1, a1, a0
1611-
; RV64IMXVTCONDOPS-NEXT: li a2, 5
1612-
; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a2, a0
1613-
; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1
1609+
; RV64IMXVTCONDOPS-NEXT: li a1, -12
1610+
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1611+
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 5
16141612
; RV64IMXVTCONDOPS-NEXT: ret
16151613
;
16161614
; CHECKZICOND-LABEL: select_cst_unknown:
16171615
; CHECKZICOND: # %bb.0:
16181616
; CHECKZICOND-NEXT: slt a0, a0, a1
1619-
; CHECKZICOND-NEXT: li a1, -7
1620-
; CHECKZICOND-NEXT: czero.nez a1, a1, a0
1621-
; CHECKZICOND-NEXT: li a2, 5
1622-
; CHECKZICOND-NEXT: czero.eqz a0, a2, a0
1623-
; CHECKZICOND-NEXT: or a0, a0, a1
1617+
; CHECKZICOND-NEXT: li a1, -12
1618+
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1619+
; CHECKZICOND-NEXT: addi a0, a0, 5
16241620
; CHECKZICOND-NEXT: ret
16251621
%cond = icmp slt i32 %a, %b
16261622
%ret = select i1 %cond, i32 5, i32 -7
16271623
ret i32 %ret
16281624
}
1625+
1626+
define i32 @select_cst1(i1 zeroext %cond) {
1627+
; RV32IM-LABEL: select_cst1:
1628+
; RV32IM: # %bb.0:
1629+
; RV32IM-NEXT: mv a1, a0
1630+
; RV32IM-NEXT: li a0, 10
1631+
; RV32IM-NEXT: bnez a1, .LBB43_2
1632+
; RV32IM-NEXT: # %bb.1:
1633+
; RV32IM-NEXT: li a0, 20
1634+
; RV32IM-NEXT: .LBB43_2:
1635+
; RV32IM-NEXT: ret
1636+
;
1637+
; RV64IM-LABEL: select_cst1:
1638+
; RV64IM: # %bb.0:
1639+
; RV64IM-NEXT: mv a1, a0
1640+
; RV64IM-NEXT: li a0, 10
1641+
; RV64IM-NEXT: bnez a1, .LBB43_2
1642+
; RV64IM-NEXT: # %bb.1:
1643+
; RV64IM-NEXT: li a0, 20
1644+
; RV64IM-NEXT: .LBB43_2:
1645+
; RV64IM-NEXT: ret
1646+
;
1647+
; RV64IMXVTCONDOPS-LABEL: select_cst1:
1648+
; RV64IMXVTCONDOPS: # %bb.0:
1649+
; RV64IMXVTCONDOPS-NEXT: li a1, 10
1650+
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1651+
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
1652+
; RV64IMXVTCONDOPS-NEXT: ret
1653+
;
1654+
; CHECKZICOND-LABEL: select_cst1:
1655+
; CHECKZICOND: # %bb.0:
1656+
; CHECKZICOND-NEXT: li a1, 10
1657+
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1658+
; CHECKZICOND-NEXT: addi a0, a0, 10
1659+
; CHECKZICOND-NEXT: ret
1660+
%ret = select i1 %cond, i32 10, i32 20
1661+
ret i32 %ret
1662+
}
1663+
1664+
define i32 @select_cst2(i1 zeroext %cond) {
1665+
; RV32IM-LABEL: select_cst2:
1666+
; RV32IM: # %bb.0:
1667+
; RV32IM-NEXT: mv a1, a0
1668+
; RV32IM-NEXT: li a0, 10
1669+
; RV32IM-NEXT: bnez a1, .LBB44_2
1670+
; RV32IM-NEXT: # %bb.1:
1671+
; RV32IM-NEXT: lui a0, 5
1672+
; RV32IM-NEXT: addi a0, a0, -480
1673+
; RV32IM-NEXT: .LBB44_2:
1674+
; RV32IM-NEXT: ret
1675+
;
1676+
; RV64IM-LABEL: select_cst2:
1677+
; RV64IM: # %bb.0:
1678+
; RV64IM-NEXT: mv a1, a0
1679+
; RV64IM-NEXT: li a0, 10
1680+
; RV64IM-NEXT: bnez a1, .LBB44_2
1681+
; RV64IM-NEXT: # %bb.1:
1682+
; RV64IM-NEXT: lui a0, 5
1683+
; RV64IM-NEXT: addiw a0, a0, -480
1684+
; RV64IM-NEXT: .LBB44_2:
1685+
; RV64IM-NEXT: ret
1686+
;
1687+
; RV64IMXVTCONDOPS-LABEL: select_cst2:
1688+
; RV64IMXVTCONDOPS: # %bb.0:
1689+
; RV64IMXVTCONDOPS-NEXT: lui a1, 5
1690+
; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -490
1691+
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1692+
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
1693+
; RV64IMXVTCONDOPS-NEXT: ret
1694+
;
1695+
; RV32IMZICOND-LABEL: select_cst2:
1696+
; RV32IMZICOND: # %bb.0:
1697+
; RV32IMZICOND-NEXT: lui a1, 5
1698+
; RV32IMZICOND-NEXT: addi a1, a1, -490
1699+
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
1700+
; RV32IMZICOND-NEXT: addi a0, a0, 10
1701+
; RV32IMZICOND-NEXT: ret
1702+
;
1703+
; RV64IMZICOND-LABEL: select_cst2:
1704+
; RV64IMZICOND: # %bb.0:
1705+
; RV64IMZICOND-NEXT: lui a1, 5
1706+
; RV64IMZICOND-NEXT: addiw a1, a1, -490
1707+
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
1708+
; RV64IMZICOND-NEXT: addi a0, a0, 10
1709+
; RV64IMZICOND-NEXT: ret
1710+
%ret = select i1 %cond, i32 10, i32 20000
1711+
ret i32 %ret
1712+
}
1713+
1714+
define i32 @select_cst3(i1 zeroext %cond) {
1715+
; RV32IM-LABEL: select_cst3:
1716+
; RV32IM: # %bb.0:
1717+
; RV32IM-NEXT: bnez a0, .LBB45_2
1718+
; RV32IM-NEXT: # %bb.1:
1719+
; RV32IM-NEXT: lui a0, 5
1720+
; RV32IM-NEXT: addi a0, a0, -480
1721+
; RV32IM-NEXT: ret
1722+
; RV32IM-NEXT: .LBB45_2:
1723+
; RV32IM-NEXT: lui a0, 7
1724+
; RV32IM-NEXT: addi a0, a0, 1328
1725+
; RV32IM-NEXT: ret
1726+
;
1727+
; RV64IM-LABEL: select_cst3:
1728+
; RV64IM: # %bb.0:
1729+
; RV64IM-NEXT: bnez a0, .LBB45_2
1730+
; RV64IM-NEXT: # %bb.1:
1731+
; RV64IM-NEXT: lui a0, 5
1732+
; RV64IM-NEXT: addiw a0, a0, -480
1733+
; RV64IM-NEXT: ret
1734+
; RV64IM-NEXT: .LBB45_2:
1735+
; RV64IM-NEXT: lui a0, 7
1736+
; RV64IM-NEXT: addiw a0, a0, 1328
1737+
; RV64IM-NEXT: ret
1738+
;
1739+
; RV64IMXVTCONDOPS-LABEL: select_cst3:
1740+
; RV64IMXVTCONDOPS: # %bb.0:
1741+
; RV64IMXVTCONDOPS-NEXT: lui a1, 1048574
1742+
; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -1808
1743+
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1744+
; RV64IMXVTCONDOPS-NEXT: lui a1, 7
1745+
; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, 1328
1746+
; RV64IMXVTCONDOPS-NEXT: add a0, a0, a1
1747+
; RV64IMXVTCONDOPS-NEXT: ret
1748+
;
1749+
; RV32IMZICOND-LABEL: select_cst3:
1750+
; RV32IMZICOND: # %bb.0:
1751+
; RV32IMZICOND-NEXT: lui a1, 1048574
1752+
; RV32IMZICOND-NEXT: addi a1, a1, -1808
1753+
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
1754+
; RV32IMZICOND-NEXT: lui a1, 7
1755+
; RV32IMZICOND-NEXT: addi a1, a1, 1328
1756+
; RV32IMZICOND-NEXT: add a0, a0, a1
1757+
; RV32IMZICOND-NEXT: ret
1758+
;
1759+
; RV64IMZICOND-LABEL: select_cst3:
1760+
; RV64IMZICOND: # %bb.0:
1761+
; RV64IMZICOND-NEXT: lui a1, 1048574
1762+
; RV64IMZICOND-NEXT: addiw a1, a1, -1808
1763+
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
1764+
; RV64IMZICOND-NEXT: lui a1, 7
1765+
; RV64IMZICOND-NEXT: addiw a1, a1, 1328
1766+
; RV64IMZICOND-NEXT: add a0, a0, a1
1767+
; RV64IMZICOND-NEXT: ret
1768+
%ret = select i1 %cond, i32 30000, i32 20000
1769+
ret i32 %ret
1770+
}
1771+
1772+
define i32 @select_cst4(i1 zeroext %cond) {
1773+
; CHECK-LABEL: select_cst4:
1774+
; CHECK: # %bb.0:
1775+
; CHECK-NEXT: neg a0, a0
1776+
; CHECK-NEXT: xori a0, a0, 2047
1777+
; CHECK-NEXT: ret
1778+
%ret = select i1 %cond, i32 -2048, i32 2047
1779+
ret i32 %ret
1780+
}
1781+
1782+
define i32 @select_cst5(i1 zeroext %cond) {
1783+
; RV32IM-LABEL: select_cst5:
1784+
; RV32IM: # %bb.0:
1785+
; RV32IM-NEXT: mv a1, a0
1786+
; RV32IM-NEXT: li a0, 2047
1787+
; RV32IM-NEXT: bnez a1, .LBB47_2
1788+
; RV32IM-NEXT: # %bb.1:
1789+
; RV32IM-NEXT: lui a0, 1
1790+
; RV32IM-NEXT: addi a0, a0, -2047
1791+
; RV32IM-NEXT: .LBB47_2:
1792+
; RV32IM-NEXT: ret
1793+
;
1794+
; RV64IM-LABEL: select_cst5:
1795+
; RV64IM: # %bb.0:
1796+
; RV64IM-NEXT: mv a1, a0
1797+
; RV64IM-NEXT: li a0, 2047
1798+
; RV64IM-NEXT: bnez a1, .LBB47_2
1799+
; RV64IM-NEXT: # %bb.1:
1800+
; RV64IM-NEXT: lui a0, 1
1801+
; RV64IM-NEXT: addiw a0, a0, -2047
1802+
; RV64IM-NEXT: .LBB47_2:
1803+
; RV64IM-NEXT: ret
1804+
;
1805+
; RV64IMXVTCONDOPS-LABEL: select_cst5:
1806+
; RV64IMXVTCONDOPS: # %bb.0:
1807+
; RV64IMXVTCONDOPS-NEXT: li a1, 2
1808+
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1809+
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
1810+
; RV64IMXVTCONDOPS-NEXT: ret
1811+
;
1812+
; CHECKZICOND-LABEL: select_cst5:
1813+
; CHECKZICOND: # %bb.0:
1814+
; CHECKZICOND-NEXT: li a1, 2
1815+
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1816+
; CHECKZICOND-NEXT: addi a0, a0, 2047
1817+
; CHECKZICOND-NEXT: ret
1818+
%ret = select i1 %cond, i32 2047, i32 2049
1819+
ret i32 %ret
1820+
}
1821+
1822+
define i32 @select_cst6(i1 zeroext %cond) {
1823+
; RV32IM-LABEL: select_cst6:
1824+
; RV32IM: # %bb.0:
1825+
; RV32IM-NEXT: bnez a0, .LBB48_2
1826+
; RV32IM-NEXT: # %bb.1:
1827+
; RV32IM-NEXT: li a0, 2047
1828+
; RV32IM-NEXT: ret
1829+
; RV32IM-NEXT: .LBB48_2:
1830+
; RV32IM-NEXT: lui a0, 1
1831+
; RV32IM-NEXT: addi a0, a0, -2047
1832+
; RV32IM-NEXT: ret
1833+
;
1834+
; RV64IM-LABEL: select_cst6:
1835+
; RV64IM: # %bb.0:
1836+
; RV64IM-NEXT: bnez a0, .LBB48_2
1837+
; RV64IM-NEXT: # %bb.1:
1838+
; RV64IM-NEXT: li a0, 2047
1839+
; RV64IM-NEXT: ret
1840+
; RV64IM-NEXT: .LBB48_2:
1841+
; RV64IM-NEXT: lui a0, 1
1842+
; RV64IM-NEXT: addiw a0, a0, -2047
1843+
; RV64IM-NEXT: ret
1844+
;
1845+
; RV64IMXVTCONDOPS-LABEL: select_cst6:
1846+
; RV64IMXVTCONDOPS: # %bb.0:
1847+
; RV64IMXVTCONDOPS-NEXT: li a1, 2
1848+
; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a1, a0
1849+
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
1850+
; RV64IMXVTCONDOPS-NEXT: ret
1851+
;
1852+
; CHECKZICOND-LABEL: select_cst6:
1853+
; CHECKZICOND: # %bb.0:
1854+
; CHECKZICOND-NEXT: li a1, 2
1855+
; CHECKZICOND-NEXT: czero.eqz a0, a1, a0
1856+
; CHECKZICOND-NEXT: addi a0, a0, 2047
1857+
; CHECKZICOND-NEXT: ret
1858+
%ret = select i1 %cond, i32 2049, i32 2047
1859+
ret i32 %ret
1860+
}

0 commit comments

Comments
 (0)