@@ -1703,23 +1703,73 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
1703
1703
return NItem;
1704
1704
}
1705
1705
1706
+ // / Detect concat of multiple values into a vector
1707
+ static bool isFreeConcat (ArrayRef<InstLane> Item,
1708
+ const TargetTransformInfo &TTI) {
1709
+ auto *Ty = cast<FixedVectorType>(Item.front ().first ->get ()->getType ());
1710
+ unsigned NumElts = Ty->getNumElements ();
1711
+ if (Item.size () == NumElts || NumElts == 1 || Item.size () % NumElts != 0 )
1712
+ return false ;
1713
+
1714
+ // Check that the concat is free, usually meaning that the type will be split
1715
+ // during legalization.
1716
+ SmallVector<int , 16 > ConcatMask (NumElts * 2 );
1717
+ std::iota (ConcatMask.begin (), ConcatMask.end (), 0 );
1718
+ if (TTI.getShuffleCost (TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
1719
+ TTI::TCK_RecipThroughput) != 0 )
1720
+ return false ;
1721
+
1722
+ unsigned NumSlices = Item.size () / NumElts;
1723
+ // Currently we generate a tree of shuffles for the concats, which limits us
1724
+ // to a power2.
1725
+ if (!isPowerOf2_32 (NumSlices))
1726
+ return false ;
1727
+ for (unsigned Slice = 0 ; Slice < NumSlices; ++Slice) {
1728
+ Use *SliceV = Item[Slice * NumElts].first ;
1729
+ if (!SliceV || SliceV->get ()->getType () != Ty)
1730
+ return false ;
1731
+ for (unsigned Elt = 0 ; Elt < NumElts; ++Elt) {
1732
+ auto [V, Lane] = Item[Slice * NumElts + Elt];
1733
+ if (Lane != static_cast <int >(Elt) || SliceV->get () != V->get ())
1734
+ return false ;
1735
+ }
1736
+ }
1737
+ return true ;
1738
+ }
1739
+
1706
1740
static Value *generateNewInstTree (ArrayRef<InstLane> Item, FixedVectorType *Ty,
1707
1741
const SmallPtrSet<Use *, 4 > &IdentityLeafs,
1708
1742
const SmallPtrSet<Use *, 4 > &SplatLeafs,
1743
+ const SmallPtrSet<Use *, 4 > &ConcatLeafs,
1709
1744
IRBuilder<> &Builder) {
1710
1745
auto [FrontU, FrontLane] = Item.front ();
1711
1746
1712
1747
if (IdentityLeafs.contains (FrontU)) {
1713
1748
return FrontU->get ();
1714
1749
}
1715
1750
if (SplatLeafs.contains (FrontU)) {
1716
- if (auto *ILI = dyn_cast<Instruction>(FrontU))
1717
- Builder.SetInsertPoint (*ILI->getInsertionPointAfterDef ());
1718
- else if (auto *Arg = dyn_cast<Argument>(FrontU))
1719
- Builder.SetInsertPointPastAllocas (Arg->getParent ());
1720
1751
SmallVector<int , 16 > Mask (Ty->getNumElements (), FrontLane);
1721
1752
return Builder.CreateShuffleVector (FrontU->get (), Mask);
1722
1753
}
1754
+ if (ConcatLeafs.contains (FrontU)) {
1755
+ unsigned NumElts =
1756
+ cast<FixedVectorType>(FrontU->get ()->getType ())->getNumElements ();
1757
+ SmallVector<Value *> Values (Item.size () / NumElts, nullptr );
1758
+ for (unsigned S = 0 ; S < Values.size (); ++S)
1759
+ Values[S] = Item[S * NumElts].first ->get ();
1760
+
1761
+ while (Values.size () > 1 ) {
1762
+ NumElts *= 2 ;
1763
+ SmallVector<int , 16 > Mask (NumElts, 0 );
1764
+ std::iota (Mask.begin (), Mask.end (), 0 );
1765
+ SmallVector<Value *> NewValues (Values.size () / 2 , nullptr );
1766
+ for (unsigned S = 0 ; S < NewValues.size (); ++S)
1767
+ NewValues[S] =
1768
+ Builder.CreateShuffleVector (Values[S * 2 ], Values[S * 2 + 1 ], Mask);
1769
+ Values = NewValues;
1770
+ }
1771
+ return Values[0 ];
1772
+ }
1723
1773
1724
1774
auto *I = cast<Instruction>(FrontU->get ());
1725
1775
auto *II = dyn_cast<IntrinsicInst>(I);
@@ -1730,16 +1780,16 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
1730
1780
Ops[Idx] = II->getOperand (Idx);
1731
1781
continue ;
1732
1782
}
1733
- Ops[Idx] = generateNewInstTree (generateInstLaneVectorFromOperand (Item, Idx),
1734
- Ty, IdentityLeafs, SplatLeafs, Builder);
1783
+ Ops[Idx] =
1784
+ generateNewInstTree (generateInstLaneVectorFromOperand (Item, Idx), Ty,
1785
+ IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
1735
1786
}
1736
1787
1737
1788
SmallVector<Value *, 8 > ValueList;
1738
1789
for (const auto &Lane : Item)
1739
1790
if (Lane.first )
1740
1791
ValueList.push_back (Lane.first ->get ());
1741
1792
1742
- Builder.SetInsertPoint (I);
1743
1793
Type *DstTy =
1744
1794
FixedVectorType::get (I->getType ()->getScalarType (), Ty->getNumElements ());
1745
1795
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
@@ -1790,7 +1840,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1790
1840
1791
1841
SmallVector<SmallVector<InstLane>> Worklist;
1792
1842
Worklist.push_back (Start);
1793
- SmallPtrSet<Use *, 4 > IdentityLeafs, SplatLeafs;
1843
+ SmallPtrSet<Use *, 4 > IdentityLeafs, SplatLeafs, ConcatLeafs ;
1794
1844
unsigned NumVisited = 0 ;
1795
1845
1796
1846
while (!Worklist.empty ()) {
@@ -1839,7 +1889,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1839
1889
1840
1890
// We need each element to be the same type of value, and check that each
1841
1891
// element has a single use.
1842
- if (! all_of (drop_begin (Item), [Item](InstLane IL) {
1892
+ if (all_of (drop_begin (Item), [Item](InstLane IL) {
1843
1893
Value *FrontV = Item.front ().first ->get ();
1844
1894
if (!IL.first )
1845
1895
return true ;
@@ -1860,48 +1910,59 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1860
1910
return !II || (isa<IntrinsicInst>(FrontV) &&
1861
1911
II->getIntrinsicID () ==
1862
1912
cast<IntrinsicInst>(FrontV)->getIntrinsicID ());
1863
- }))
1864
- return false ;
1865
-
1866
- // Check the operator is one that we support. We exclude div/rem in case
1867
- // they hit UB from poison lanes.
1868
- if ((isa<BinaryOperator>(FrontU) &&
1869
- !cast<BinaryOperator>(FrontU)->isIntDivRem ()) ||
1870
- isa<CmpInst>(FrontU)) {
1871
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1872
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1873
- } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
1874
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1875
- } else if (isa<SelectInst>(FrontU)) {
1876
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1877
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1878
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 2 ));
1879
- } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
1880
- II && isTriviallyVectorizable (II->getIntrinsicID ())) {
1881
- for (unsigned Op = 0 , E = II->getNumOperands () - 1 ; Op < E; Op++) {
1882
- if (isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (), Op)) {
1883
- if (!all_of (drop_begin (Item), [Item, Op](InstLane &IL) {
1884
- Value *FrontV = Item.front ().first ->get ();
1885
- Use *U = IL.first ;
1886
- return !U || (cast<Instruction>(U->get ())->getOperand (Op) ==
1887
- cast<Instruction>(FrontV)->getOperand (Op));
1888
- }))
1889
- return false ;
1890
- continue ;
1913
+ })) {
1914
+ // Check the operator is one that we support.
1915
+ if (isa<BinaryOperator, CmpInst>(FrontU)) {
1916
+ // We exclude div/rem in case they hit UB from poison lanes.
1917
+ if (auto *BO = dyn_cast<BinaryOperator>(FrontU);
1918
+ BO && BO->isIntDivRem ())
1919
+ return false ;
1920
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1921
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1922
+ continue ;
1923
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
1924
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1925
+ continue ;
1926
+ } else if (isa<SelectInst>(FrontU)) {
1927
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1928
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1929
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 2 ));
1930
+ continue ;
1931
+ } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
1932
+ II && isTriviallyVectorizable (II->getIntrinsicID ())) {
1933
+ for (unsigned Op = 0 , E = II->getNumOperands () - 1 ; Op < E; Op++) {
1934
+ if (isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (), Op)) {
1935
+ if (!all_of (drop_begin (Item), [Item, Op](InstLane &IL) {
1936
+ Value *FrontV = Item.front ().first ->get ();
1937
+ Use *U = IL.first ;
1938
+ return !U || (cast<Instruction>(U->get ())->getOperand (Op) ==
1939
+ cast<Instruction>(FrontV)->getOperand (Op));
1940
+ }))
1941
+ return false ;
1942
+ continue ;
1943
+ }
1944
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, Op));
1891
1945
}
1892
- Worklist. push_back ( generateInstLaneVectorFromOperand (Item, Op)) ;
1946
+ continue ;
1893
1947
}
1894
- } else {
1895
- return false ;
1896
1948
}
1949
+
1950
+ if (isFreeConcat (Item, TTI)) {
1951
+ ConcatLeafs.insert (FrontU);
1952
+ continue ;
1953
+ }
1954
+
1955
+ return false ;
1897
1956
}
1898
1957
1899
1958
if (NumVisited <= 1 )
1900
1959
return false ;
1901
1960
1902
1961
// If we got this far, we know the shuffles are superfluous and can be
1903
1962
// removed. Scan through again and generate the new tree of instructions.
1904
- Value *V = generateNewInstTree (Start, Ty, IdentityLeafs, SplatLeafs, Builder);
1963
+ Builder.SetInsertPoint (&I);
1964
+ Value *V = generateNewInstTree (Start, Ty, IdentityLeafs, SplatLeafs,
1965
+ ConcatLeafs, Builder);
1905
1966
replaceValue (I, *V);
1906
1967
return true ;
1907
1968
}
0 commit comments