Skip to content

Commit ebaafdd

Browse files
committed
[PowerPC] Check value uses in ValueBit tracking
1 parent cc62782 commit ebaafdd

File tree

5 files changed

+117
-92
lines changed

5 files changed

+117
-92
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 99 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,30 +1630,41 @@ class BitPermutationSelector {
16301630
bool &Interesting = ValueEntry->first;
16311631
SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
16321632
Bits.resize(NumBits);
1633+
SDValue LHS = V.getNumOperands() > 0 ? V.getOperand(0) : SDValue();
1634+
SDValue RHS = V.getNumOperands() > 1 ? V.getOperand(1) : SDValue();
16331635

16341636
switch (V.getOpcode()) {
16351637
default: break;
16361638
case ISD::ROTL:
1637-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1639+
if (isa<ConstantSDNode>(RHS)) {
16381640
unsigned RotAmt = V.getConstantOperandVal(1);
16391641

1640-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1641-
1642-
for (unsigned i = 0; i < NumBits; ++i)
1643-
Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1642+
if (LHS.hasOneUse()) {
1643+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1644+
for (unsigned i = 0; i < NumBits; ++i)
1645+
Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1646+
} else {
1647+
for (unsigned i = 0; i < NumBits; ++i)
1648+
Bits[i] =
1649+
ValueBit(LHS, i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt);
1650+
}
16441651

16451652
return std::make_pair(Interesting = true, &Bits);
16461653
}
16471654
break;
16481655
case ISD::SHL:
16491656
case PPCISD::SHL:
1650-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1657+
if (isa<ConstantSDNode>(RHS)) {
16511658
unsigned ShiftAmt = V.getConstantOperandVal(1);
16521659

1653-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1654-
1655-
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1656-
Bits[i] = LHSBits[i - ShiftAmt];
1660+
if (LHS.hasOneUse()) {
1661+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1662+
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1663+
Bits[i] = LHSBits[i - ShiftAmt];
1664+
} else {
1665+
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1666+
Bits[i] = ValueBit(LHS, i - ShiftAmt);
1667+
}
16571668

16581669
for (unsigned i = 0; i < ShiftAmt; ++i)
16591670
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1663,13 +1674,17 @@ class BitPermutationSelector {
16631674
break;
16641675
case ISD::SRL:
16651676
case PPCISD::SRL:
1666-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1677+
if (isa<ConstantSDNode>(RHS)) {
16671678
unsigned ShiftAmt = V.getConstantOperandVal(1);
16681679

1669-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1670-
1671-
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1672-
Bits[i] = LHSBits[i + ShiftAmt];
1680+
if (LHS.hasOneUse()) {
1681+
const auto &LHSBits = *getValueBits(LHS, NumBits).second;
1682+
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1683+
Bits[i] = LHSBits[i + ShiftAmt];
1684+
} else {
1685+
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1686+
Bits[i] = ValueBit(LHS, i + ShiftAmt);
1687+
}
16731688

16741689
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
16751690
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1678,23 +1693,27 @@ class BitPermutationSelector {
16781693
}
16791694
break;
16801695
case ISD::AND:
1681-
if (isa<ConstantSDNode>(V.getOperand(1))) {
1696+
if (isa<ConstantSDNode>(RHS)) {
16821697
uint64_t Mask = V.getConstantOperandVal(1);
16831698

1684-
const SmallVector<ValueBit, 64> *LHSBits;
1699+
const SmallVector<ValueBit, 64> *LHSBits = nullptr;
16851700
// Mark this as interesting, only if the LHS was also interesting. This
16861701
// prevents the overall procedure from matching a single immediate 'and'
16871702
// (which is non-optimal because such an and might be folded with other
16881703
// things if we don't select it here).
1689-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1704+
if (LHS.hasOneUse())
1705+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
16901706

16911707
for (unsigned i = 0; i < NumBits; ++i)
1692-
if (((Mask >> i) & 1) == 1)
1693-
Bits[i] = (*LHSBits)[i];
1694-
else {
1708+
if (((Mask >> i) & 1) == 1) {
1709+
if (LHS.hasOneUse())
1710+
Bits[i] = (*LHSBits)[i];
1711+
else
1712+
Bits[i] = ValueBit(LHS, i);
1713+
} else {
16951714
// AND instruction masks this bit. If the input is already zero,
16961715
// we have nothing to do here. Otherwise, make the bit ConstZero.
1697-
if ((*LHSBits)[i].isZero())
1716+
if (LHS.hasOneUse() && (*LHSBits)[i].isZero())
16981717
Bits[i] = (*LHSBits)[i];
16991718
else
17001719
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1704,44 +1723,54 @@ class BitPermutationSelector {
17041723
}
17051724
break;
17061725
case ISD::OR: {
1707-
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1708-
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1726+
const auto *LHSBits =
1727+
LHS.hasOneUse() ? getValueBits(LHS, NumBits).second : nullptr;
1728+
const auto *RHSBits =
1729+
RHS.hasOneUse() ? getValueBits(RHS, NumBits).second : nullptr;
17091730

17101731
bool AllDisjoint = true;
17111732
SDValue LastVal = SDValue();
17121733
unsigned LastIdx = 0;
17131734
for (unsigned i = 0; i < NumBits; ++i) {
1714-
if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1735+
if (LHSBits && RHSBits && (*LHSBits)[i].isZero() &&
1736+
(*RHSBits)[i].isZero()) {
17151737
// If both inputs are known to be zero and one is ConstZero and
17161738
// another is VariableKnownToBeZero, we can select whichever
17171739
// we like. To minimize the number of bit groups, we select
17181740
// VariableKnownToBeZero if this bit is the next bit of the same
17191741
// input variable from the previous bit. Otherwise, we select
17201742
// ConstZero.
1721-
if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1722-
LHSBits[i].getValueBitIndex() == LastIdx + 1)
1723-
Bits[i] = LHSBits[i];
1724-
else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1725-
RHSBits[i].getValueBitIndex() == LastIdx + 1)
1726-
Bits[i] = RHSBits[i];
1743+
const auto &LBits = *LHSBits;
1744+
const auto &RBits = *RHSBits;
1745+
if (LBits[i].hasValue() && LBits[i].getValue() == LastVal &&
1746+
LBits[i].getValueBitIndex() == LastIdx + 1)
1747+
Bits[i] = LBits[i];
1748+
else if (RBits[i].hasValue() && RBits[i].getValue() == LastVal &&
1749+
RBits[i].getValueBitIndex() == LastIdx + 1)
1750+
Bits[i] = RBits[i];
17271751
else
17281752
Bits[i] = ValueBit(ValueBit::ConstZero);
1729-
}
1730-
else if (LHSBits[i].isZero())
1731-
Bits[i] = RHSBits[i];
1732-
else if (RHSBits[i].isZero())
1733-
Bits[i] = LHSBits[i];
1734-
else {
1753+
} else if (LHSBits && (*LHSBits)[i].isZero()) {
1754+
if (RHSBits)
1755+
Bits[i] = (*RHSBits)[i];
1756+
else
1757+
Bits[i] = ValueBit(RHS, i);
1758+
} else if (RHSBits && (*RHSBits)[i].isZero()) {
1759+
if (LHSBits)
1760+
Bits[i] = (*LHSBits)[i];
1761+
else
1762+
Bits[i] = ValueBit(LHS, i);
1763+
} else {
17351764
AllDisjoint = false;
17361765
break;
17371766
}
17381767
// We remember the value and bit index of this bit.
17391768
if (Bits[i].hasValue()) {
17401769
LastVal = Bits[i].getValue();
17411770
LastIdx = Bits[i].getValueBitIndex();
1742-
}
1743-
else {
1744-
if (LastVal) LastVal = SDValue();
1771+
} else {
1772+
if (LastVal)
1773+
LastVal = SDValue();
17451774
LastIdx = 0;
17461775
}
17471776
}
@@ -1753,33 +1782,34 @@ class BitPermutationSelector {
17531782
}
17541783
case ISD::ZERO_EXTEND: {
17551784
// We support only the case with zero extension from i32 to i64 so far.
1756-
if (V.getValueType() != MVT::i64 ||
1757-
V.getOperand(0).getValueType() != MVT::i32)
1785+
if (V.getValueType() != MVT::i64 || LHS.getValueType() != MVT::i32)
17581786
break;
17591787

1760-
const SmallVector<ValueBit, 64> *LHSBits;
17611788
const unsigned NumOperandBits = 32;
1762-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1763-
NumOperandBits);
1764-
1765-
for (unsigned i = 0; i < NumOperandBits; ++i)
1766-
Bits[i] = (*LHSBits)[i];
1789+
if (LHS.hasOneUse()) {
1790+
const SmallVector<ValueBit, 64> *LHSBits;
1791+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumOperandBits);
1792+
for (unsigned i = 0; i < NumOperandBits; ++i)
1793+
Bits[i] = (*LHSBits)[i];
1794+
} else {
1795+
for (unsigned i = 0; i < NumOperandBits; ++i)
1796+
Bits[i] = ValueBit(LHS, i);
1797+
}
17671798

17681799
for (unsigned i = NumOperandBits; i < NumBits; ++i)
17691800
Bits[i] = ValueBit(ValueBit::ConstZero);
17701801

17711802
return std::make_pair(Interesting, &Bits);
17721803
}
17731804
case ISD::TRUNCATE: {
1774-
EVT FromType = V.getOperand(0).getValueType();
1805+
EVT FromType = LHS.getValueType();
17751806
EVT ToType = V.getValueType();
17761807
// We support only the case with truncate from i64 to i32.
1777-
if (FromType != MVT::i64 || ToType != MVT::i32)
1808+
if (FromType != MVT::i64 || ToType != MVT::i32 || !LHS.hasOneUse())
17781809
break;
17791810
const unsigned NumAllBits = FromType.getSizeInBits();
17801811
SmallVector<ValueBit, 64> *InBits;
1781-
std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1782-
NumAllBits);
1812+
std::tie(Interesting, InBits) = getValueBits(LHS, NumAllBits);
17831813
const unsigned NumValidBits = ToType.getSizeInBits();
17841814

17851815
// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
@@ -1802,22 +1832,28 @@ class BitPermutationSelector {
18021832
// For AssertZext, we look through the operand and
18031833
// mark the bits known to be zero.
18041834
const SmallVector<ValueBit, 64> *LHSBits;
1805-
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1806-
NumBits);
18071835

1808-
EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1836+
EVT FromType = cast<VTSDNode>(RHS)->getVT();
18091837
const unsigned NumValidBits = FromType.getSizeInBits();
1810-
for (unsigned i = 0; i < NumValidBits; ++i)
1811-
Bits[i] = (*LHSBits)[i];
18121838

18131839
// These bits are known to be zero but the AssertZext may be from a value
18141840
// that already has some constant zero bits (i.e. from a masking and).
1815-
for (unsigned i = NumValidBits; i < NumBits; ++i)
1816-
Bits[i] = (*LHSBits)[i].hasValue()
1817-
? ValueBit((*LHSBits)[i].getValue(),
1818-
(*LHSBits)[i].getValueBitIndex(),
1819-
ValueBit::VariableKnownToBeZero)
1820-
: ValueBit(ValueBit::ConstZero);
1841+
if (LHS.hasOneUse()) {
1842+
std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
1843+
for (unsigned i = 0; i < NumValidBits; ++i)
1844+
Bits[i] = (*LHSBits)[i];
1845+
for (unsigned i = NumValidBits; i < NumBits; ++i)
1846+
Bits[i] = (*LHSBits)[i].hasValue()
1847+
? ValueBit((*LHSBits)[i].getValue(),
1848+
(*LHSBits)[i].getValueBitIndex(),
1849+
ValueBit::VariableKnownToBeZero)
1850+
: ValueBit(ValueBit::ConstZero);
1851+
} else {
1852+
for (unsigned i = 0; i < NumValidBits; ++i)
1853+
Bits[i] = ValueBit(LHS, i);
1854+
for (unsigned i = NumValidBits; i < NumBits; ++i)
1855+
Bits[i] = ValueBit(LHS, i, ValueBit::VariableKnownToBeZero);
1856+
}
18211857

18221858
return std::make_pair(Interesting, &Bits);
18231859
}

llvm/test/CodeGen/PowerPC/int128_ldst.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,10 @@ entry:
208208
define dso_local i128 @ld_or2___int128___int128(i64 %ptr, i8 zeroext %off) {
209209
; CHECK-LABEL: ld_or2___int128___int128:
210210
; CHECK: # %bb.0: # %entry
211-
; CHECK-NEXT: rldicr 5, 3, 0, 51
212-
; CHECK-NEXT: rotldi 6, 3, 52
213-
; CHECK-NEXT: ldx 3, 5, 4
214-
; CHECK-NEXT: rldimi 4, 6, 12, 0
215-
; CHECK-NEXT: ld 4, 8(4)
211+
; CHECK-NEXT: rldicr 3, 3, 0, 51
212+
; CHECK-NEXT: or 5, 3, 4
213+
; CHECK-NEXT: ldx 3, 3, 4
214+
; CHECK-NEXT: ld 4, 8(5)
216215
; CHECK-NEXT: blr
217216
entry:
218217
%and = and i64 %ptr, -4096
@@ -740,11 +739,10 @@ entry:
740739
define dso_local void @st_or2__int128___int128(i64 %ptr, i8 zeroext %off, i128 %str) {
741740
; CHECK-LABEL: st_or2__int128___int128:
742741
; CHECK: # %bb.0: # %entry
743-
; CHECK-NEXT: rldicr 7, 3, 0, 51
744-
; CHECK-NEXT: rotldi 3, 3, 52
745-
; CHECK-NEXT: stdx 5, 7, 4
746-
; CHECK-NEXT: rldimi 4, 3, 12, 0
747-
; CHECK-NEXT: std 6, 8(4)
742+
; CHECK-NEXT: rldicr 3, 3, 0, 51
743+
; CHECK-NEXT: or 7, 3, 4
744+
; CHECK-NEXT: stdx 5, 3, 4
745+
; CHECK-NEXT: std 6, 8(7)
748746
; CHECK-NEXT: blr
749747
entry:
750748
%and = and i64 %ptr, -4096

llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,9 +639,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
639639
; CHECK-NEXT: #
640640
; CHECK-NEXT: lbzu r0, 1(r5)
641641
; CHECK-NEXT: mulli r29, r0, 171
642-
; CHECK-NEXT: rlwinm r28, r29, 24, 8, 30
643-
; CHECK-NEXT: srwi r29, r29, 9
644-
; CHECK-NEXT: add r29, r29, r28
642+
; CHECK-NEXT: srwi r28, r29, 9
643+
; CHECK-NEXT: rlwinm r29, r29, 24, 8, 30
644+
; CHECK-NEXT: add r29, r28, r29
645645
; CHECK-NEXT: sub r0, r0, r29
646646
; CHECK-NEXT: clrlwi r0, r0, 24
647647
; CHECK-NEXT: cmplwi r0, 1

llvm/test/CodeGen/PowerPC/prefer-dqform.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
3535
; CHECK-P9-NEXT: addi r8, r5, -8
3636
; CHECK-P9-NEXT: lwz r5, 0(r7)
3737
; CHECK-P9-NEXT: extsw r7, r4
38-
; CHECK-P9-NEXT: rldic r4, r3, 3, 29
38+
; CHECK-P9-NEXT: sldi r4, r3, 3
3939
; CHECK-P9-NEXT: sub r3, r7, r3
4040
; CHECK-P9-NEXT: addi r10, r4, 8
4141
; CHECK-P9-NEXT: lxvdsx vs0, 0, r8
@@ -87,7 +87,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
8787
; CHECK-P10-NEXT: addi r8, r5, -8
8888
; CHECK-P10-NEXT: lwz r5, 0(r7)
8989
; CHECK-P10-NEXT: extsw r7, r4
90-
; CHECK-P10-NEXT: rldic r4, r3, 3, 29
90+
; CHECK-P10-NEXT: sldi r4, r3, 3
9191
; CHECK-P10-NEXT: addi r10, r4, 8
9292
; CHECK-P10-NEXT: sub r3, r7, r3
9393
; CHECK-P10-NEXT: lxvdsx vs0, 0, r8

llvm/test/CodeGen/PowerPC/rldimi.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,8 @@ entry:
1717
define i64 @rldimi2(i64 %a) {
1818
; CHECK-LABEL: rldimi2:
1919
; CHECK: # %bb.0: # %entry
20-
; CHECK-NEXT: mr 4, 3
21-
; CHECK-NEXT: rlwimi 4, 3, 8, 16, 23
22-
; CHECK-NEXT: rlwimi 4, 3, 16, 8, 15
23-
; CHECK-NEXT: rldimi 4, 3, 24, 0
24-
; CHECK-NEXT: mr 3, 4
20+
; CHECK-NEXT: rldimi 3, 3, 8, 0
21+
; CHECK-NEXT: rldimi 3, 3, 16, 0
2522
; CHECK-NEXT: blr
2623
entry:
2724
%x0 = shl i64 %a, 8
@@ -36,15 +33,9 @@ entry:
3633
define i64 @rldimi3(i64 %a) {
3734
; CHECK-LABEL: rldimi3:
3835
; CHECK: # %bb.0: # %entry
39-
; CHECK-NEXT: rotldi 4, 3, 32
40-
; CHECK-NEXT: rlwimi 4, 3, 0, 24, 31
41-
; CHECK-NEXT: rlwimi 4, 3, 8, 16, 23
42-
; CHECK-NEXT: rlwimi 4, 3, 16, 8, 15
43-
; CHECK-NEXT: rlwimi 4, 3, 24, 0, 7
44-
; CHECK-NEXT: rldimi 4, 3, 40, 16
45-
; CHECK-NEXT: rldimi 4, 3, 48, 8
46-
; CHECK-NEXT: rldimi 4, 3, 56, 0
47-
; CHECK-NEXT: mr 3, 4
36+
; CHECK-NEXT: rldimi 3, 3, 8, 0
37+
; CHECK-NEXT: rldimi 3, 3, 16, 0
38+
; CHECK-NEXT: rlwinm 3, 3, 0, 1, 0
4839
; CHECK-NEXT: blr
4940
entry:
5041
%0 = shl i64 %a, 8

0 commit comments

Comments
 (0)