Skip to content

Commit e100d2b

Browse files
authored
[DAGCombiner] Fold subtraction if above a constant threshold to umin (#135194)
Like #134235, but with a constant. It's a pattern in Adler-32 checksum calculation in zlib. Example: unsigned adler32_mod(unsigned x) { return x >= 65521u ? x - 65521u : x; } Before, on RISC-V: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 negw a1, a1 addi a2, a2, 15 and a1, a1, a2 addw a0, a0, a1 Or, with Zicond: lui a1, 16 lui a2, 1048560 addiw a1, a1, -16 sltu a1, a1, a0 addi a2, a2, 15 czero.eqz a1, a2, a1 addw a0, a0, a1 After, with Zbb: lui a1, 1048560 addi a1, a1, 15 addw a1, a0, a1 minu a0, a1, a0
1 parent c12cb0c commit e100d2b

File tree

3 files changed

+268
-142
lines changed

3 files changed

+268
-142
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,13 @@ namespace {
845845
return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
846846
}
847847

848+
bool hasUMin(EVT VT) const {
849+
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
850+
return (LK.first == TargetLoweringBase::TypeLegal ||
851+
LK.first == TargetLoweringBase::TypePromoteInteger) &&
852+
TLI.isOperationLegal(ISD::UMIN, LK.second);
853+
}
854+
848855
public:
849856
/// Runs the dag combiner on all nodes in the work list
850857
void Run(CombineLevel AtLevel);
@@ -4253,10 +4260,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
42534260

42544261
// (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
42554262
// (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4256-
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
4257-
if ((LK.first == TargetLoweringBase::TypeLegal ||
4258-
LK.first == TargetLoweringBase::TypePromoteInteger) &&
4259-
TLI.isOperationLegal(ISD::UMIN, LK.second)) {
4263+
if (hasUMin(VT)) {
42604264
SDValue Y;
42614265
if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
42624266
m_SpecificCondCode(ISD::SETULT)),
@@ -12074,6 +12078,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1207412078

1207512079
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
1207612080
return NewSel;
12081+
12082+
// (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12083+
// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12084+
APInt C;
12085+
if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) {
12086+
if ((CC == ISD::SETUGT && Cond0 == N2 &&
12087+
sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) ||
12088+
(CC == ISD::SETULT && Cond0 == N1 &&
12089+
sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C)))))
12090+
return DAG.getNode(ISD::UMIN, DL, VT, N1, N2);
12091+
}
1207712092
}
1207812093

1207912094
if (!VT.isVector())

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 113 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,45 +1720,67 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
17201720
}
17211721

17221722
define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
1723-
; CHECK-LABEL: sub_if_uge_C_i8:
1724-
; CHECK: # %bb.0:
1725-
; CHECK-NEXT: sltiu a1, a0, 13
1726-
; CHECK-NEXT: addi a1, a1, -1
1727-
; CHECK-NEXT: andi a1, a1, -13
1728-
; CHECK-NEXT: add a0, a0, a1
1729-
; CHECK-NEXT: ret
1723+
; RV32I-LABEL: sub_if_uge_C_i8:
1724+
; RV32I: # %bb.0:
1725+
; RV32I-NEXT: sltiu a1, a0, 13
1726+
; RV32I-NEXT: addi a1, a1, -1
1727+
; RV32I-NEXT: andi a1, a1, -13
1728+
; RV32I-NEXT: add a0, a0, a1
1729+
; RV32I-NEXT: ret
1730+
;
1731+
; RV32ZBB-LABEL: sub_if_uge_C_i8:
1732+
; RV32ZBB: # %bb.0:
1733+
; RV32ZBB-NEXT: addi a1, a0, -13
1734+
; RV32ZBB-NEXT: zext.b a1, a1
1735+
; RV32ZBB-NEXT: minu a0, a1, a0
1736+
; RV32ZBB-NEXT: ret
17301737
%cmp = icmp ugt i8 %x, 12
17311738
%sub = add i8 %x, -13
17321739
%conv4 = select i1 %cmp, i8 %sub, i8 %x
17331740
ret i8 %conv4
17341741
}
17351742

17361743
define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
1737-
; CHECK-LABEL: sub_if_uge_C_i16:
1738-
; CHECK: # %bb.0:
1739-
; CHECK-NEXT: sltiu a1, a0, 251
1740-
; CHECK-NEXT: addi a1, a1, -1
1741-
; CHECK-NEXT: andi a1, a1, -251
1742-
; CHECK-NEXT: add a0, a0, a1
1743-
; CHECK-NEXT: ret
1744+
; RV32I-LABEL: sub_if_uge_C_i16:
1745+
; RV32I: # %bb.0:
1746+
; RV32I-NEXT: sltiu a1, a0, 251
1747+
; RV32I-NEXT: addi a1, a1, -1
1748+
; RV32I-NEXT: andi a1, a1, -251
1749+
; RV32I-NEXT: add a0, a0, a1
1750+
; RV32I-NEXT: ret
1751+
;
1752+
; RV32ZBB-LABEL: sub_if_uge_C_i16:
1753+
; RV32ZBB: # %bb.0:
1754+
; RV32ZBB-NEXT: addi a1, a0, -251
1755+
; RV32ZBB-NEXT: zext.h a1, a1
1756+
; RV32ZBB-NEXT: minu a0, a1, a0
1757+
; RV32ZBB-NEXT: ret
17441758
%cmp = icmp ugt i16 %x, 250
17451759
%sub = add i16 %x, -251
17461760
%conv4 = select i1 %cmp, i16 %sub, i16 %x
17471761
ret i16 %conv4
17481762
}
17491763

17501764
define i32 @sub_if_uge_C_i32(i32 signext %x) {
1751-
; CHECK-LABEL: sub_if_uge_C_i32:
1752-
; CHECK: # %bb.0:
1753-
; CHECK-NEXT: lui a1, 16
1754-
; CHECK-NEXT: lui a2, 1048560
1755-
; CHECK-NEXT: addi a1, a1, -16
1756-
; CHECK-NEXT: sltu a1, a1, a0
1757-
; CHECK-NEXT: neg a1, a1
1758-
; CHECK-NEXT: addi a2, a2, 15
1759-
; CHECK-NEXT: and a1, a1, a2
1760-
; CHECK-NEXT: add a0, a0, a1
1761-
; CHECK-NEXT: ret
1765+
; RV32I-LABEL: sub_if_uge_C_i32:
1766+
; RV32I: # %bb.0:
1767+
; RV32I-NEXT: lui a1, 16
1768+
; RV32I-NEXT: lui a2, 1048560
1769+
; RV32I-NEXT: addi a1, a1, -16
1770+
; RV32I-NEXT: sltu a1, a1, a0
1771+
; RV32I-NEXT: neg a1, a1
1772+
; RV32I-NEXT: addi a2, a2, 15
1773+
; RV32I-NEXT: and a1, a1, a2
1774+
; RV32I-NEXT: add a0, a0, a1
1775+
; RV32I-NEXT: ret
1776+
;
1777+
; RV32ZBB-LABEL: sub_if_uge_C_i32:
1778+
; RV32ZBB: # %bb.0:
1779+
; RV32ZBB-NEXT: lui a1, 1048560
1780+
; RV32ZBB-NEXT: addi a1, a1, 15
1781+
; RV32ZBB-NEXT: add a1, a0, a1
1782+
; RV32ZBB-NEXT: minu a0, a1, a0
1783+
; RV32ZBB-NEXT: ret
17621784
%cmp = icmp ugt i32 %x, 65520
17631785
%sub = add i32 %x, -65521
17641786
%cond = select i1 %cmp, i32 %sub, i32 %x
@@ -1797,18 +1819,30 @@ define i64 @sub_if_uge_C_i64(i64 %x) {
17971819
}
17981820

17991821
define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
1800-
; CHECK-LABEL: sub_if_uge_C_multiuse_cmp_i32:
1801-
; CHECK: # %bb.0:
1802-
; CHECK-NEXT: lui a2, 16
1803-
; CHECK-NEXT: lui a3, 1048560
1804-
; CHECK-NEXT: addi a2, a2, -16
1805-
; CHECK-NEXT: sltu a2, a2, a0
1806-
; CHECK-NEXT: neg a4, a2
1807-
; CHECK-NEXT: addi a3, a3, 15
1808-
; CHECK-NEXT: and a3, a4, a3
1809-
; CHECK-NEXT: add a0, a0, a3
1810-
; CHECK-NEXT: sw a2, 0(a1)
1811-
; CHECK-NEXT: ret
1822+
; RV32I-LABEL: sub_if_uge_C_multiuse_cmp_i32:
1823+
; RV32I: # %bb.0:
1824+
; RV32I-NEXT: lui a2, 16
1825+
; RV32I-NEXT: lui a3, 1048560
1826+
; RV32I-NEXT: addi a2, a2, -16
1827+
; RV32I-NEXT: sltu a2, a2, a0
1828+
; RV32I-NEXT: neg a4, a2
1829+
; RV32I-NEXT: addi a3, a3, 15
1830+
; RV32I-NEXT: and a3, a4, a3
1831+
; RV32I-NEXT: add a0, a0, a3
1832+
; RV32I-NEXT: sw a2, 0(a1)
1833+
; RV32I-NEXT: ret
1834+
;
1835+
; RV32ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32:
1836+
; RV32ZBB: # %bb.0:
1837+
; RV32ZBB-NEXT: lui a2, 16
1838+
; RV32ZBB-NEXT: lui a3, 1048560
1839+
; RV32ZBB-NEXT: addi a2, a2, -16
1840+
; RV32ZBB-NEXT: addi a3, a3, 15
1841+
; RV32ZBB-NEXT: sltu a2, a2, a0
1842+
; RV32ZBB-NEXT: add a3, a0, a3
1843+
; RV32ZBB-NEXT: minu a0, a3, a0
1844+
; RV32ZBB-NEXT: sw a2, 0(a1)
1845+
; RV32ZBB-NEXT: ret
18121846
%cmp = icmp ugt i32 %x, 65520
18131847
%conv = zext i1 %cmp to i32
18141848
store i32 %conv, ptr %z, align 4
@@ -1818,20 +1852,29 @@ define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
18181852
}
18191853

18201854
define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
1821-
; CHECK-LABEL: sub_if_uge_C_multiuse_sub_i32:
1822-
; CHECK: # %bb.0:
1823-
; CHECK-NEXT: lui a2, 1048560
1824-
; CHECK-NEXT: lui a3, 16
1825-
; CHECK-NEXT: addi a2, a2, 15
1826-
; CHECK-NEXT: add a2, a0, a2
1827-
; CHECK-NEXT: addi a3, a3, -16
1828-
; CHECK-NEXT: sw a2, 0(a1)
1829-
; CHECK-NEXT: bltu a3, a0, .LBB62_2
1830-
; CHECK-NEXT: # %bb.1:
1831-
; CHECK-NEXT: mv a2, a0
1832-
; CHECK-NEXT: .LBB62_2:
1833-
; CHECK-NEXT: mv a0, a2
1834-
; CHECK-NEXT: ret
1855+
; RV32I-LABEL: sub_if_uge_C_multiuse_sub_i32:
1856+
; RV32I: # %bb.0:
1857+
; RV32I-NEXT: lui a2, 1048560
1858+
; RV32I-NEXT: lui a3, 16
1859+
; RV32I-NEXT: addi a2, a2, 15
1860+
; RV32I-NEXT: add a2, a0, a2
1861+
; RV32I-NEXT: addi a3, a3, -16
1862+
; RV32I-NEXT: sw a2, 0(a1)
1863+
; RV32I-NEXT: bltu a3, a0, .LBB62_2
1864+
; RV32I-NEXT: # %bb.1:
1865+
; RV32I-NEXT: mv a2, a0
1866+
; RV32I-NEXT: .LBB62_2:
1867+
; RV32I-NEXT: mv a0, a2
1868+
; RV32I-NEXT: ret
1869+
;
1870+
; RV32ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32:
1871+
; RV32ZBB: # %bb.0:
1872+
; RV32ZBB-NEXT: lui a2, 1048560
1873+
; RV32ZBB-NEXT: addi a2, a2, 15
1874+
; RV32ZBB-NEXT: add a2, a0, a2
1875+
; RV32ZBB-NEXT: minu a0, a2, a0
1876+
; RV32ZBB-NEXT: sw a2, 0(a1)
1877+
; RV32ZBB-NEXT: ret
18351878
%sub = add i32 %x, -65521
18361879
store i32 %sub, ptr %z, align 4
18371880
%cmp = icmp ugt i32 %x, 65520
@@ -1840,17 +1883,25 @@ define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
18401883
}
18411884

18421885
define i32 @sub_if_uge_C_swapped_i32(i32 %x) {
1843-
; CHECK-LABEL: sub_if_uge_C_swapped_i32:
1844-
; CHECK: # %bb.0:
1845-
; CHECK-NEXT: lui a1, 16
1846-
; CHECK-NEXT: lui a2, 1048560
1847-
; CHECK-NEXT: addi a1, a1, -15
1848-
; CHECK-NEXT: sltu a1, a0, a1
1849-
; CHECK-NEXT: addi a1, a1, -1
1850-
; CHECK-NEXT: addi a2, a2, 15
1851-
; CHECK-NEXT: and a1, a1, a2
1852-
; CHECK-NEXT: add a0, a0, a1
1853-
; CHECK-NEXT: ret
1886+
; RV32I-LABEL: sub_if_uge_C_swapped_i32:
1887+
; RV32I: # %bb.0:
1888+
; RV32I-NEXT: lui a1, 16
1889+
; RV32I-NEXT: lui a2, 1048560
1890+
; RV32I-NEXT: addi a1, a1, -15
1891+
; RV32I-NEXT: sltu a1, a0, a1
1892+
; RV32I-NEXT: addi a1, a1, -1
1893+
; RV32I-NEXT: addi a2, a2, 15
1894+
; RV32I-NEXT: and a1, a1, a2
1895+
; RV32I-NEXT: add a0, a0, a1
1896+
; RV32I-NEXT: ret
1897+
;
1898+
; RV32ZBB-LABEL: sub_if_uge_C_swapped_i32:
1899+
; RV32ZBB: # %bb.0:
1900+
; RV32ZBB-NEXT: lui a1, 1048560
1901+
; RV32ZBB-NEXT: addi a1, a1, 15
1902+
; RV32ZBB-NEXT: add a1, a0, a1
1903+
; RV32ZBB-NEXT: minu a0, a0, a1
1904+
; RV32ZBB-NEXT: ret
18541905
%cmp = icmp ult i32 %x, 65521
18551906
%sub = add i32 %x, -65521
18561907
%cond = select i1 %cmp, i32 %x, i32 %sub

0 commit comments

Comments
 (0)