Skip to content

Commit 3aacf81

Browse files
committed
[VectorCombine] Add foldShuffleToIdentity
This patch adds a basic version of a combine that attempts to fold away shuffles that when combines simplify away to an identity shuffle. For example: %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> %abt = fneg <4 x half> %at %abb = fneg <4 x half> %ab %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> By looking through the shuffles, it can be simplified to: %r = fneg <8 x half> %a The code tracks each lane starting from the original shuffle, keeping a track of a vector of {src, idx}. As we propagate up through the instructions we will either look through intermediate instructions (binops and unops) or see a collections of lanes that all have the same src and incrementing idx (an identity). We can also see a single value with identical lanes, which we can treat like a splat. Only the basic version is added here, handling identites, splats, binops and unops. In follow-up patches other instructions can be added such as constants, intrinsics, cmp/sel and zext/sext/trunc.
1 parent c8dca5b commit 3aacf81

File tree

4 files changed

+168
-118
lines changed

4 files changed

+168
-118
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class VectorCombine {
113113
bool scalarizeLoadExtract(Instruction &I);
114114
bool foldShuffleOfBinops(Instruction &I);
115115
bool foldShuffleOfCastops(Instruction &I);
116+
bool foldShuffleToIdentity(Instruction &I);
116117
bool foldShuffleFromReductions(Instruction &I);
117118
bool foldTruncFromReductions(Instruction &I);
118119
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1547,6 +1548,148 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
15471548
return true;
15481549
}
15491550

1551+
// Starting from a shuffle, look up through operands tracking the shuffled index
1552+
// of each lane. If we can simplify away the shuffles to identities then
1553+
// do so.
1554+
bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1555+
FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
1556+
if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
1557+
!isa<Instruction>(I.getOperand(1)))
1558+
return false;
1559+
1560+
using InstLane = std::pair<Value *, int>;
1561+
1562+
auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
1563+
while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1564+
unsigned NumElts =
1565+
cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1566+
int M = SV->getMaskValue(Lane);
1567+
if (M < 0)
1568+
return {nullptr, -1};
1569+
else if (M < (int)NumElts) {
1570+
V = SV->getOperand(0);
1571+
Lane = M;
1572+
} else {
1573+
V = SV->getOperand(1);
1574+
Lane = M - NumElts;
1575+
}
1576+
}
1577+
return InstLane{V, Lane};
1578+
};
1579+
1580+
auto GenerateInstLaneVectorFromOperand =
1581+
[&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
1582+
SmallVector<InstLane> NItem;
1583+
for (InstLane V : Item) {
1584+
NItem.emplace_back(
1585+
!V.first
1586+
? InstLane{nullptr, -1}
1587+
: LookThroughShuffles(
1588+
cast<Instruction>(V.first)->getOperand(Op), V.second));
1589+
}
1590+
return NItem;
1591+
};
1592+
1593+
SmallVector<InstLane> Start;
1594+
for (unsigned M = 0; M < Ty->getNumElements(); ++M)
1595+
Start.push_back(LookThroughShuffles(&I, M));
1596+
1597+
SmallVector<SmallVector<InstLane>> Worklist;
1598+
Worklist.push_back(Start);
1599+
SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
1600+
unsigned NumVisited = 0;
1601+
1602+
while (!Worklist.empty()) {
1603+
SmallVector<InstLane> Item = Worklist.pop_back_val();
1604+
if (++NumVisited > MaxInstrsToScan)
1605+
return false;
1606+
1607+
// If we found an undef first lane then bail out to keep things simple.
1608+
if (!Item[0].first)
1609+
return false;
1610+
1611+
// Look for an identity value.
1612+
if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1613+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1614+
return !E.value().first || (E.value().first == Item[0].first &&
1615+
E.value().second == (int)E.index());
1616+
})) {
1617+
IdentityLeafs.insert(Item[0].first);
1618+
continue;
1619+
}
1620+
// Look for a splat value.
1621+
if (all_of(drop_begin(Item), [&](InstLane &IL) {
1622+
return !IL.first ||
1623+
(IL.first == Item[0].first && IL.second == Item[0].second);
1624+
})) {
1625+
SplatLeafs.insert(Item[0].first);
1626+
continue;
1627+
}
1628+
1629+
// We need each element to be the same type of value, and check that each
1630+
// element has a single use.
1631+
if (!all_of(drop_begin(Item), [&](InstLane IL) {
1632+
if (!IL.first)
1633+
return true;
1634+
if (isa<Instruction>(IL.first) &&
1635+
!cast<Instruction>(IL.first)->hasOneUse())
1636+
return false;
1637+
return IL.first->getValueID() == Item[0].first->getValueID() &&
1638+
(!isa<IntrinsicInst>(IL.first) ||
1639+
cast<IntrinsicInst>(IL.first)->getIntrinsicID() ==
1640+
cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
1641+
}))
1642+
return false;
1643+
1644+
// Check the operator is one that we support.
1645+
if (isa<BinaryOperator>(Item[0].first)) {
1646+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1647+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
1648+
} else if (isa<UnaryOperator>(Item[0].first)) {
1649+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1650+
} else {
1651+
return false;
1652+
}
1653+
}
1654+
1655+
// If we got this far, we know the shuffles are superfluous and can be
1656+
// removed. Scan through again and generate the new tree of instructions.
1657+
std::function<Value *(const SmallVector<InstLane> &)> generate =
1658+
[&](const SmallVector<InstLane> &Item) -> Value * {
1659+
if (IdentityLeafs.contains(Item[0].first) &&
1660+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1661+
return !E.value().first || (E.value().first == Item[0].first &&
1662+
E.value().second == (int)E.index());
1663+
})) {
1664+
return Item[0].first;
1665+
} else if (SplatLeafs.contains(Item[0].first)) {
1666+
if (auto ILI = dyn_cast<Instruction>(Item[0].first))
1667+
Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
1668+
else if (isa<Argument>(Item[0].first))
1669+
Builder.SetInsertPointPastAllocas(I.getParent()->getParent());
1670+
SmallVector<int, 16> Mask(Ty->getNumElements(), Item[0].second);
1671+
return Builder.CreateShuffleVector(Item[0].first, Mask);
1672+
}
1673+
1674+
auto *I = cast<Instruction>(Item[0].first);
1675+
SmallVector<Value *> Ops;
1676+
unsigned E = I->getNumOperands();
1677+
for (unsigned Idx = 0; Idx < E; Idx++)
1678+
Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx)));
1679+
Builder.SetInsertPoint(I);
1680+
if (auto BI = dyn_cast<BinaryOperator>(I))
1681+
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
1682+
Ops[0], Ops[1]);
1683+
if (auto UI = dyn_cast<UnaryOperator>(I))
1684+
return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]);
1685+
llvm_unreachable("Unhandled instruction in generate");
1686+
};
1687+
1688+
Value *V = generate(Start);
1689+
replaceValue(I, *V);
1690+
return true;
1691+
}
1692+
15501693
/// Given a commutative reduction, the order of the input lanes does not alter
15511694
/// the results. We can use this to remove certain shuffles feeding the
15521695
/// reduction, removing the need to shuffle at all.
@@ -2103,6 +2246,7 @@ bool VectorCombine::run() {
21032246
MadeChange |= foldShuffleOfBinops(I);
21042247
MadeChange |= foldShuffleOfCastops(I);
21052248
MadeChange |= foldSelectShuffle(I);
2249+
MadeChange |= foldShuffleToIdentity(I);
21062250
break;
21072251
case Instruction::BitCast:
21082252
MadeChange |= foldBitcastShuffle(I);

0 commit comments

Comments
 (0)