Skip to content

Commit a5d5588

Browse files
committed
[PowerPC] Check value uses in ValueBit tracking
1 parent 1eec357 commit a5d5588

File tree

5 files changed

+163
-78
lines changed

5 files changed

+163
-78
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 99 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,30 +1629,41 @@ class BitPermutationSelector {
16291629
bool &Interesting = ValueEntry->first;
16301630
SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
16311631
Bits.resize(NumBits);
1632+
SDValue LHS = V.getNumOperands() > 0 ? V.getOperand(0) : SDValue();
1633+
SDValue RHS = V.getNumOperands() > 1 ? V.getOperand(1) : SDValue();
16321634

16331635
switch (V.getOpcode()) {
16341636
default: break;
16351637
case ISD::ROTL:
1636-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1638+
if (isa<ConstantSDNode>(RHS)) {
16371639
unsigned RotAmt = V.getConstantOperandVal(1);
16381640

1639-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1640-
1641-
for (unsigned i = 0; i < NumBits; ++i)
1642-
Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1641+
if (LHS.hasOneUse()) {
1642+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1643+
for (unsigned i = 0; i < NumBits; ++i)
1644+
Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1645+
} else {
1646+
for (unsigned i = 0; i < NumBits; ++i)
1647+
Bits[i] =
1648+
ValueBit(LHS, i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt);
1649+
}
16431650

16441651
return std::make_pair(Interesting = true, &Bits);
16451652
}
16461653
break;
16471654
case ISD::SHL:
16481655
case PPCISD::SHL:
1649-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1656+
if (isa<ConstantSDNode>(RHS)) {
16501657
unsigned ShiftAmt = V.getConstantOperandVal(1);
16511658

1652-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1653-
1654-
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1655-
Bits[i] = LHSBits[i - ShiftAmt];
1659+
if (LHS.hasOneUse()) {
1660+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1661+
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1662+
Bits[i] = LHSBits[i - ShiftAmt];
1663+
} else {
1664+
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1665+
Bits[i] = ValueBit(LHS, i - ShiftAmt);
1666+
}
16561667

16571668
for (unsigned i = 0; i < ShiftAmt; ++i)
16581669
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1662,13 +1673,17 @@ class BitPermutationSelector {
16621673
break;
16631674
case ISD::SRL:
16641675
case PPCISD::SRL:
1665-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1676+
if (isa<ConstantSDNode>(RHS)) {
16661677
unsigned ShiftAmt = V.getConstantOperandVal(1);
16671678

1668-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1669-
1670-
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1671-
Bits[i] = LHSBits[i + ShiftAmt];
1679+
if (LHS.hasOneUse()) {
1680+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1681+
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1682+
Bits[i] = LHSBits[i + ShiftAmt];
1683+
} else {
1684+
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1685+
Bits[i] = ValueBit(LHS, i + ShiftAmt);
1686+
}
16721687

16731688
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
16741689
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1677,23 +1692,27 @@ class BitPermutationSelector {
16771692
}
16781693
break;
16791694
case ISD::AND:
1680-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1695+
if (isa<ConstantSDNode>(RHS)) {
16811696
uint64_t Mask = V.getConstantOperandVal(1);
16821697

1683-
const SmallVector<ValueBit, 64> *LHSBits;
1698+
const SmallVector<ValueBit, 64> *LHSBits = nullptr;
16841699
// Mark this as interesting, only if the LHS was also interesting. This
16851700
// prevents the overall procedure from matching a single immediate 'and'
16861701
// (which is non-optimal because such an and might be folded with other
16871702
// things if we don't select it here).
1688-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1703+
if (LHS.hasOneUse())
1704+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
16891705

16901706
for (unsigned i = 0; i < NumBits; ++i)
1691-
if (((Mask >> i) & 1) == 1)
1692-
Bits[i] = (*LHSBits)[i];
1693-
else {
1707+
if (((Mask >> i) & 1) == 1) {
1708+
if (LHS.hasOneUse())
1709+
Bits[i] = (*LHSBits)[i];
1710+
else
1711+
Bits[i] = ValueBit(LHS, i);
1712+
} else {
16941713
// AND instruction masks this bit. If the input is already zero,
16951714
// we have nothing to do here. Otherwise, make the bit ConstZero.
1696-
if ((*LHSBits)[i].isZero())
1715+
if (LHS.hasOneUse() && (*LHSBits)[i].isZero())
16971716
Bits[i] = (*LHSBits)[i];
16981717
else
16991718
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1703,44 +1722,54 @@ class BitPermutationSelector {
17031722
}
17041723
break;
17051724
case ISD::OR: {
1706-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1707-
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1725+
const auto *LHSBits =
1726+
LHS.hasOneUse() ? getValueBits(LHS, NumBits).second : nullptr;
1727+
const auto *RHSBits =
1728+
RHS.hasOneUse() ? getValueBits(RHS, NumBits).second : nullptr;
17081729

17091730
bool AllDisjoint = true;
17101731
SDValue LastVal = SDValue();
17111732
unsigned LastIdx = 0;
17121733
for (unsigned i = 0; i < NumBits; ++i) {
1713-
if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1734+
if (LHSBits && RHSBits && (*LHSBits)[i].isZero() &&
1735+
(*RHSBits)[i].isZero()) {
17141736
// If both inputs are known to be zero and one is ConstZero and
17151737
// another is VariableKnownToBeZero, we can select whichever
17161738
// we like. To minimize the number of bit groups, we select
17171739
// VariableKnownToBeZero if this bit is the next bit of the same
17181740
// input variable from the previous bit. Otherwise, we select
17191741
// ConstZero.
1720-
if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1721-
LHSBits[i].getValueBitIndex() == LastIdx + 1)
1722-
Bits[i] = LHSBits[i];
1723-
else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1724-
RHSBits[i].getValueBitIndex() == LastIdx + 1)
1725-
Bits[i] = RHSBits[i];
1742+
const auto &LBits = *LHSBits;
1743+
const auto &RBits = *RHSBits;
1744+
if (LBits[i].hasValue() && LBits[i].getValue() == LastVal &&
1745+
LBits[i].getValueBitIndex() == LastIdx + 1)
1746+
Bits[i] = LBits[i];
1747+
else if (RBits[i].hasValue() && RBits[i].getValue() == LastVal &&
1748+
RBits[i].getValueBitIndex() == LastIdx + 1)
1749+
Bits[i] = RBits[i];
17261750
else
17271751
Bits[i] = ValueBit(ValueBit::ConstZero);
1728-
}
1729-
else if (LHSBits[i].isZero())
1730-
Bits[i] = RHSBits[i];
1731-
else if (RHSBits[i].isZero())
1732-
Bits[i] = LHSBits[i];
1733-
else {
1752+
} else if (LHSBits && (*LHSBits)[i].isZero()) {
1753+
if (RHSBits)
1754+
Bits[i] = (*RHSBits)[i];
1755+
else
1756+
Bits[i] = ValueBit(RHS, i);
1757+
} else if (RHSBits && (*RHSBits)[i].isZero()) {
1758+
if (LHSBits)
1759+
Bits[i] = (*LHSBits)[i];
1760+
else
1761+
Bits[i] = ValueBit(LHS, i);
1762+
} else {
17341763
AllDisjoint = false;
17351764
break;
17361765
}
17371766
// We remember the value and bit index of this bit.
17381767
if (Bits[i].hasValue()) {
17391768
LastVal = Bits[i].getValue();
17401769
LastIdx = Bits[i].getValueBitIndex();
1741-
}
1742-
else {
1743-
if (LastVal) LastVal = SDValue();
1770+
} else {
1771+
if (LastVal)
1772+
LastVal = SDValue();
17441773
LastIdx = 0;
17451774
}
17461775
}
@@ -1752,33 +1781,34 @@ class BitPermutationSelector {
17521781
}
17531782
case ISD::ZERO_EXTEND: {
17541783
// We support only the case with zero extension from i32 to i64 so far.
1755-
if (V.getValueType() != MVT::i64 ||
1756-
V.getOperand(0).getValueType() != MVT::i32)
1784+
if (V.getValueType() != MVT::i64 || LHS.getValueType() != MVT::i32)
17571785
break;
17581786

1759-
const SmallVector<ValueBit, 64> *LHSBits;
17601787
const unsigned NumOperandBits = 32;
1761-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1762-
NumOperandBits);
1763-
1764-
for (unsigned i = 0; i < NumOperandBits; ++i)
1765-
Bits[i] = (*LHSBits)[i];
1788+
if (LHS.hasOneUse()) {
1789+
const SmallVector<ValueBit, 64> *LHSBits;
1790+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumOperandBits);
1791+
for (unsigned i = 0; i < NumOperandBits; ++i)
1792+
Bits[i] = (*LHSBits)[i];
1793+
} else {
1794+
for (unsigned i = 0; i < NumOperandBits; ++i)
1795+
Bits[i] = ValueBit(LHS, i);
1796+
}
17661797

17671798
for (unsigned i = NumOperandBits; i < NumBits; ++i)
17681799
Bits[i] = ValueBit(ValueBit::ConstZero);
17691800

17701801
return std::make_pair(Interesting, &Bits);
17711802
}
17721803
case ISD::TRUNCATE: {
1773-
EVT FromType = V.getOperand(0).getValueType();
1804+
EVT FromType = LHS.getValueType();
17741805
EVT ToType = V.getValueType();
17751806
// We support only the case with truncate from i64 to i32.
1776-
if (FromType != MVT::i64 || ToType != MVT::i32)
1807+
if (FromType != MVT::i64 || ToType != MVT::i32 || !LHS.hasOneUse())
17771808
break;
17781809
const unsigned NumAllBits = FromType.getSizeInBits();
17791810
SmallVector<ValueBit, 64> *InBits;
1780-
std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1781-
NumAllBits);
1811+
std::tie(Interesting, InBits) = getValueBits(LHS, NumAllBits);
17821812
const unsigned NumValidBits = ToType.getSizeInBits();
17831813

17841814
// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
@@ -1801,22 +1831,28 @@ class BitPermutationSelector {
18011831
// For AssertZext, we look through the operand and
18021832
// mark the bits known to be zero.
18031833
const SmallVector<ValueBit, 64> *LHSBits;
1804-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1805-
NumBits);
18061834

1807-
EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1835+
EVT FromType = cast<VTSDNode>(RHS)->getVT();
18081836
const unsigned NumValidBits = FromType.getSizeInBits();
1809-
for (unsigned i = 0; i < NumValidBits; ++i)
1810-
Bits[i] = (*LHSBits)[i];
18111837

18121838
// These bits are known to be zero but the AssertZext may be from a value
18131839
// that already has some constant zero bits (i.e. from a masking and).
1814-
for (unsigned i = NumValidBits; i < NumBits; ++i)
1815-
Bits[i] = (*LHSBits)[i].hasValue()
1816-
? ValueBit((*LHSBits)[i].getValue(),
1817-
(*LHSBits)[i].getValueBitIndex(),
1818-
ValueBit::VariableKnownToBeZero)
1819-
: ValueBit(ValueBit::ConstZero);
1840+
if (LHS.hasOneUse()) {
1841+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
1842+
for (unsigned i = 0; i < NumValidBits; ++i)
1843+
Bits[i] = (*LHSBits)[i];
1844+
for (unsigned i = NumValidBits; i < NumBits; ++i)
1845+
Bits[i] = (*LHSBits)[i].hasValue()
1846+
? ValueBit((*LHSBits)[i].getValue(),
1847+
(*LHSBits)[i].getValueBitIndex(),
1848+
ValueBit::VariableKnownToBeZero)
1849+
: ValueBit(ValueBit::ConstZero);
1850+
} else {
1851+
for (unsigned i = 0; i < NumValidBits; ++i)
1852+
Bits[i] = ValueBit(LHS, i);
1853+
for (unsigned i = NumValidBits; i < NumBits; ++i)
1854+
Bits[i] = ValueBit(LHS, i, ValueBit::VariableKnownToBeZero);
1855+
}
18201856

18211857
return std::make_pair(Interesting, &Bits);
18221858
}

llvm/test/CodeGen/PowerPC/int128_ldst.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,10 @@ entry:
208208
define dso_local i128 @ld_or2___int128___int128(i64 %ptr, i8 zeroext %off) {
209209
; CHECK-LABEL: ld_or2___int128___int128:
210210
; CHECK: # %bb.0: # %entry
211-
; CHECK-NEXT: rldicr 5, 3, 0, 51
212-
; CHECK-NEXT: rotldi 6, 3, 52
213-
; CHECK-NEXT: ldx 3, 5, 4
214-
; CHECK-NEXT: rldimi 4, 6, 12, 0
215-
; CHECK-NEXT: ld 4, 8(4)
211+
; CHECK-NEXT: rldicr 3, 3, 0, 51
212+
; CHECK-NEXT: or 5, 3, 4
213+
; CHECK-NEXT: ldx 3, 3, 4
214+
; CHECK-NEXT: ld 4, 8(5)
216215
; CHECK-NEXT: blr
217216
entry:
218217
%and = and i64 %ptr, -4096
@@ -740,11 +739,10 @@ entry:
740739
define dso_local void @st_or2__int128___int128(i64 %ptr, i8 zeroext %off, i128 %str) {
741740
; CHECK-LABEL: st_or2__int128___int128:
742741
; CHECK: # %bb.0: # %entry
743-
; CHECK-NEXT: rldicr 7, 3, 0, 51
744-
; CHECK-NEXT: rotldi 3, 3, 52
745-
; CHECK-NEXT: stdx 5, 7, 4
746-
; CHECK-NEXT: rldimi 4, 3, 12, 0
747-
; CHECK-NEXT: std 6, 8(4)
742+
; CHECK-NEXT: rldicr 3, 3, 0, 51
743+
; CHECK-NEXT: or 7, 3, 4
744+
; CHECK-NEXT: stdx 5, 3, 4
745+
; CHECK-NEXT: std 6, 8(7)
748746
; CHECK-NEXT: blr
749747
entry:
750748
%and = and i64 %ptr, -4096

llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,9 +639,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
639639
; CHECK-NEXT: #
640640
; CHECK-NEXT: lbzu r0, 1(r5)
641641
; CHECK-NEXT: mulli r29, r0, 171
642-
; CHECK-NEXT: rlwinm r28, r29, 24, 8, 30
643-
; CHECK-NEXT: srwi r29, r29, 9
644-
; CHECK-NEXT: add r29, r29, r28
642+
; CHECK-NEXT: srwi r28, r29, 9
643+
; CHECK-NEXT: rlwinm r29, r29, 24, 8, 30
644+
; CHECK-NEXT: add r29, r28, r29
645645
; CHECK-NEXT: sub r0, r0, r29
646646
; CHECK-NEXT: clrlwi r0, r0, 24
647647
; CHECK-NEXT: cmplwi r0, 1

llvm/test/CodeGen/PowerPC/prefer-dqform.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
3535
; CHECK-P9-NEXT: addi r8, r5, -8
3636
; CHECK-P9-NEXT: lwz r5, 0(r7)
3737
; CHECK-P9-NEXT: extsw r7, r4
38-
; CHECK-P9-NEXT: rldic r4, r3, 3, 29
38+
; CHECK-P9-NEXT: sldi r4, r3, 3
3939
; CHECK-P9-NEXT: sub r3, r7, r3
4040
; CHECK-P9-NEXT: addi r10, r4, 8
4141
; CHECK-P9-NEXT: lxvdsx vs0, 0, r8
@@ -87,7 +87,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
8787
; CHECK-P10-NEXT: addi r8, r5, -8
8888
; CHECK-P10-NEXT: lwz r5, 0(r7)
8989
; CHECK-P10-NEXT: extsw r7, r4
90-
; CHECK-P10-NEXT: rldic r4, r3, 3, 29
90+
; CHECK-P10-NEXT: sldi r4, r3, 3
9191
; CHECK-P10-NEXT: addi r10, r4, 8
9292
; CHECK-P10-NEXT: sub r3, r7, r3
9393
; CHECK-P10-NEXT: lxvdsx vs0, 0, r8

llvm/test/CodeGen/PowerPC/rldimi.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
3+
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix -mcpu=pwr8 | FileCheck %s
4+
5+
define i64 @rldimi1(i64 %a) {
6+
; CHECK-LABEL: rldimi1:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: rldimi 3, 3, 8, 0
9+
; CHECK-NEXT: blr
10+
entry:
11+
%x0 = shl i64 %a, 8
12+
%x1 = and i64 %a, 255
13+
%x2 = or i64 %x0, %x1
14+
ret i64 %x2
15+
}
16+
17+
define i64 @rldimi2(i64 %a) {
18+
; CHECK-LABEL: rldimi2:
19+
; CHECK: # %bb.0: # %entry
20+
; CHECK-NEXT: rldimi 3, 3, 8, 0
21+
; CHECK-NEXT: rldimi 3, 3, 16, 0
22+
; CHECK-NEXT: blr
23+
entry:
24+
%x0 = shl i64 %a, 8
25+
%x1 = and i64 %a, 255
26+
%x2 = or i64 %x0, %x1
27+
%x3 = shl i64 %x2, 16
28+
%x4 = and i64 %x2, 65535
29+
%x5 = or i64 %x3, %x4
30+
ret i64 %x5
31+
}
32+
33+
define i64 @rldimi3(i64 %a) {
34+
; CHECK-LABEL: rldimi3:
35+
; CHECK: # %bb.0: # %entry
36+
; CHECK-NEXT: rldimi 3, 3, 8, 0
37+
; CHECK-NEXT: rldimi 3, 3, 16, 0
38+
; CHECK-NEXT: rlwinm 3, 3, 0, 1, 0
39+
; CHECK-NEXT: blr
40+
entry:
41+
%0 = shl i64 %a, 8
42+
%1 = and i64 %a, 255
43+
%2 = or i64 %0, %1
44+
%3 = shl i64 %2, 16
45+
%4 = and i64 %2, 65535
46+
%5 = or i64 %3, %4
47+
%6 = shl i64 %5, 32
48+
%7 = and i64 %5, 4294967295
49+
%8 = or i64 %6, %7
50+
ret i64 %8
51+
}

0 commit comments

Comments
 (0)