@@ -113,6 +113,7 @@ class VectorCombine {
113
113
bool scalarizeLoadExtract (Instruction &I);
114
114
bool foldShuffleOfBinops (Instruction &I);
115
115
bool foldShuffleOfCastops (Instruction &I);
116
+ bool foldShuffleToIdentity (Instruction &I);
116
117
bool foldShuffleFromReductions (Instruction &I);
117
118
bool foldTruncFromReductions (Instruction &I);
118
119
bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
@@ -1547,6 +1548,148 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
1547
1548
return true ;
1548
1549
}
1549
1550
1551
+ // Starting from a shuffle, look up through operands tracking the shuffled index
1552
+ // of each lane. If we can simplify away the shuffles to identities then
1553
+ // do so.
1554
+ bool VectorCombine::foldShuffleToIdentity (Instruction &I) {
1555
+ FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType ());
1556
+ if (!Ty || !isa<Instruction>(I.getOperand (0 )) ||
1557
+ !isa<Instruction>(I.getOperand (1 )))
1558
+ return false ;
1559
+
1560
+ using InstLane = std::pair<Value *, int >;
1561
+
1562
+ auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
1563
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1564
+ unsigned NumElts =
1565
+ cast<FixedVectorType>(SV->getOperand (0 )->getType ())->getNumElements ();
1566
+ int M = SV->getMaskValue (Lane);
1567
+ if (M < 0 )
1568
+ return {nullptr , -1 };
1569
+ else if (M < (int )NumElts) {
1570
+ V = SV->getOperand (0 );
1571
+ Lane = M;
1572
+ } else {
1573
+ V = SV->getOperand (1 );
1574
+ Lane = M - NumElts;
1575
+ }
1576
+ }
1577
+ return InstLane{V, Lane};
1578
+ };
1579
+
1580
+ auto GenerateInstLaneVectorFromOperand =
1581
+ [&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
1582
+ SmallVector<InstLane> NItem;
1583
+ for (InstLane V : Item) {
1584
+ NItem.emplace_back (
1585
+ !V.first
1586
+ ? InstLane{nullptr , -1 }
1587
+ : LookThroughShuffles (
1588
+ cast<Instruction>(V.first )->getOperand (Op), V.second ));
1589
+ }
1590
+ return NItem;
1591
+ };
1592
+
1593
+ SmallVector<InstLane> Start;
1594
+ for (unsigned M = 0 ; M < Ty->getNumElements (); ++M)
1595
+ Start.push_back (LookThroughShuffles (&I, M));
1596
+
1597
+ SmallVector<SmallVector<InstLane>> Worklist;
1598
+ Worklist.push_back (Start);
1599
+ SmallPtrSet<Value *, 4 > IdentityLeafs, SplatLeafs;
1600
+ unsigned NumVisited = 0 ;
1601
+
1602
+ while (!Worklist.empty ()) {
1603
+ SmallVector<InstLane> Item = Worklist.pop_back_val ();
1604
+ if (++NumVisited > MaxInstrsToScan)
1605
+ return false ;
1606
+
1607
+ // If we found an undef first lane then bail out to keep things simple.
1608
+ if (!Item[0 ].first )
1609
+ return false ;
1610
+
1611
+ // Look for an identity value.
1612
+ if (Item[0 ].second == 0 && Item[0 ].first ->getType () == Ty &&
1613
+ all_of (drop_begin (enumerate(Item)), [&](const auto &E) {
1614
+ return !E.value ().first || (E.value ().first == Item[0 ].first &&
1615
+ E.value ().second == (int )E.index ());
1616
+ })) {
1617
+ IdentityLeafs.insert (Item[0 ].first );
1618
+ continue ;
1619
+ }
1620
+ // Look for a splat value.
1621
+ if (all_of (drop_begin (Item), [&](InstLane &IL) {
1622
+ return !IL.first ||
1623
+ (IL.first == Item[0 ].first && IL.second == Item[0 ].second );
1624
+ })) {
1625
+ SplatLeafs.insert (Item[0 ].first );
1626
+ continue ;
1627
+ }
1628
+
1629
+ // We need each element to be the same type of value, and check that each
1630
+ // element has a single use.
1631
+ if (!all_of (drop_begin (Item), [&](InstLane IL) {
1632
+ if (!IL.first )
1633
+ return true ;
1634
+ if (isa<Instruction>(IL.first ) &&
1635
+ !cast<Instruction>(IL.first )->hasOneUse ())
1636
+ return false ;
1637
+ return IL.first ->getValueID () == Item[0 ].first ->getValueID () &&
1638
+ (!isa<IntrinsicInst>(IL.first ) ||
1639
+ cast<IntrinsicInst>(IL.first )->getIntrinsicID () ==
1640
+ cast<IntrinsicInst>(Item[0 ].first )->getIntrinsicID ());
1641
+ }))
1642
+ return false ;
1643
+
1644
+ // Check the operator is one that we support.
1645
+ if (isa<BinaryOperator>(Item[0 ].first )) {
1646
+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 0 ));
1647
+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 1 ));
1648
+ } else if (isa<UnaryOperator>(Item[0 ].first )) {
1649
+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 0 ));
1650
+ } else {
1651
+ return false ;
1652
+ }
1653
+ }
1654
+
1655
+ // If we got this far, we know the shuffles are superfluous and can be
1656
+ // removed. Scan through again and generate the new tree of instructions.
1657
+ std::function<Value *(const SmallVector<InstLane> &)> generate =
1658
+ [&](const SmallVector<InstLane> &Item) -> Value * {
1659
+ if (IdentityLeafs.contains (Item[0 ].first ) &&
1660
+ all_of (drop_begin (enumerate(Item)), [&](const auto &E) {
1661
+ return !E.value ().first || (E.value ().first == Item[0 ].first &&
1662
+ E.value ().second == (int )E.index ());
1663
+ })) {
1664
+ return Item[0 ].first ;
1665
+ } else if (SplatLeafs.contains (Item[0 ].first )) {
1666
+ if (auto ILI = dyn_cast<Instruction>(Item[0 ].first ))
1667
+ Builder.SetInsertPoint (*ILI->getInsertionPointAfterDef ());
1668
+ else if (isa<Argument>(Item[0 ].first ))
1669
+ Builder.SetInsertPointPastAllocas (I.getParent ()->getParent ());
1670
+ SmallVector<int , 16 > Mask (Ty->getNumElements (), Item[0 ].second );
1671
+ return Builder.CreateShuffleVector (Item[0 ].first , Mask);
1672
+ }
1673
+
1674
+ auto *I = cast<Instruction>(Item[0 ].first );
1675
+ SmallVector<Value *> Ops;
1676
+ unsigned E = I->getNumOperands ();
1677
+ for (unsigned Idx = 0 ; Idx < E; Idx++)
1678
+ Ops.push_back (generate (GenerateInstLaneVectorFromOperand (Item, Idx)));
1679
+ Builder.SetInsertPoint (I);
1680
+ if (auto BI = dyn_cast<BinaryOperator>(I))
1681
+ return Builder.CreateBinOp ((Instruction::BinaryOps)BI->getOpcode (),
1682
+ Ops[0 ], Ops[1 ]);
1683
+ if (auto UI = dyn_cast<UnaryOperator>(I))
1684
+ return Builder.CreateUnOp ((Instruction::UnaryOps)UI->getOpcode (), Ops[0 ]);
1685
+ llvm_unreachable (" Unhandled instruction in generate" );
1686
+ };
1687
+
1688
+ Value *V = generate (Start);
1689
+ replaceValue (I, *V);
1690
+ return true ;
1691
+ }
1692
+
1550
1693
// / Given a commutative reduction, the order of the input lanes does not alter
1551
1694
// / the results. We can use this to remove certain shuffles feeding the
1552
1695
// / reduction, removing the need to shuffle at all.
@@ -2103,6 +2246,7 @@ bool VectorCombine::run() {
2103
2246
MadeChange |= foldShuffleOfBinops (I);
2104
2247
MadeChange |= foldShuffleOfCastops (I);
2105
2248
MadeChange |= foldSelectShuffle (I);
2249
+ MadeChange |= foldShuffleToIdentity (I);
2106
2250
break ;
2107
2251
case Instruction::BitCast:
2108
2252
MadeChange |= foldBitcastShuffle (I);
0 commit comments