@@ -1395,12 +1395,19 @@ class BoUpSLP {
1395
1395
return LookAheadHeuristics::ScoreSplat;
1396
1396
}
1397
1397
1398
+ auto CheckSameEntryOrFail = [&]() {
1399
+ if (const TreeEntry *TE1 = R.getTreeEntry(V1);
1400
+ TE1 && TE1 == R.getTreeEntry(V2))
1401
+ return LookAheadHeuristics::ScoreSplatLoads;
1402
+ return LookAheadHeuristics::ScoreFail;
1403
+ };
1404
+
1398
1405
auto *LI1 = dyn_cast<LoadInst>(V1);
1399
1406
auto *LI2 = dyn_cast<LoadInst>(V2);
1400
1407
if (LI1 && LI2) {
1401
1408
if (LI1->getParent() != LI2->getParent() || !LI1->isSimple() ||
1402
1409
!LI2->isSimple())
1403
- return LookAheadHeuristics::ScoreFail ;
1410
+ return CheckSameEntryOrFail() ;
1404
1411
1405
1412
std::optional<int> Dist = getPointersDiff(
1406
1413
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
@@ -1412,7 +1419,7 @@ class BoUpSLP {
1412
1419
FixedVectorType::get(LI1->getType(), NumLanes),
1413
1420
LI1->getAlign()))
1414
1421
return LookAheadHeuristics::ScoreMaskedGatherCandidate;
1415
- return LookAheadHeuristics::ScoreFail ;
1422
+ return CheckSameEntryOrFail() ;
1416
1423
}
1417
1424
// The distance is too large - still may be profitable to use masked
1418
1425
// loads/gathers.
@@ -1469,14 +1476,14 @@ class BoUpSLP {
1469
1476
}
1470
1477
return LookAheadHeuristics::ScoreAltOpcodes;
1471
1478
}
1472
- return LookAheadHeuristics::ScoreFail ;
1479
+ return CheckSameEntryOrFail() ;
1473
1480
}
1474
1481
1475
1482
auto *I1 = dyn_cast<Instruction>(V1);
1476
1483
auto *I2 = dyn_cast<Instruction>(V2);
1477
1484
if (I1 && I2) {
1478
1485
if (I1->getParent() != I2->getParent())
1479
- return LookAheadHeuristics::ScoreFail ;
1486
+ return CheckSameEntryOrFail() ;
1480
1487
SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
1481
1488
Ops.push_back(I1);
1482
1489
Ops.push_back(I2);
@@ -1497,7 +1504,7 @@ class BoUpSLP {
1497
1504
if (isa<UndefValue>(V2))
1498
1505
return LookAheadHeuristics::ScoreUndef;
1499
1506
1500
- return LookAheadHeuristics::ScoreFail ;
1507
+ return CheckSameEntryOrFail() ;
1501
1508
}
1502
1509
1503
1510
/// Go through the operands of \p LHS and \p RHS recursively until
@@ -1660,6 +1667,7 @@ class BoUpSLP {
1660
1667
const DataLayout &DL;
1661
1668
ScalarEvolution &SE;
1662
1669
const BoUpSLP &R;
1670
+ const Loop *L = nullptr;
1663
1671
1664
1672
/// \returns the operand data at \p OpIdx and \p Lane.
1665
1673
OperandData &getData(unsigned OpIdx, unsigned Lane) {
@@ -1828,8 +1836,9 @@ class BoUpSLP {
1828
1836
// Track if the operand must be marked as used. If the operand is set to
1829
1837
// Score 1 explicitly (because of non power-of-2 unique scalars, we may
1830
1838
// want to reestimate the operands again on the following iterations).
1831
- bool IsUsed =
1832
- RMode == ReorderingMode::Splat || RMode == ReorderingMode::Constant;
1839
+ bool IsUsed = RMode == ReorderingMode::Splat ||
1840
+ RMode == ReorderingMode::Constant ||
1841
+ RMode == ReorderingMode::Load;
1833
1842
// Iterate through all unused operands and look for the best.
1834
1843
for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
1835
1844
// Get the operand at Idx and Lane.
@@ -1850,23 +1859,44 @@ class BoUpSLP {
1850
1859
// Look for an operand that matches the current mode.
1851
1860
switch (RMode) {
1852
1861
case ReorderingMode::Load:
1853
- case ReorderingMode::Constant:
1854
1862
case ReorderingMode::Opcode: {
1855
1863
bool LeftToRight = Lane > LastLane;
1856
1864
Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
1857
1865
Value *OpRight = (LeftToRight) ? Op : OpLastLane;
1858
1866
int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane,
1859
1867
OpIdx, Idx, IsUsed);
1860
- if (Score > static_cast<int>(BestOp.Score)) {
1868
+ if (Score > static_cast<int>(BestOp.Score) ||
1869
+ (Score > 0 && Score == static_cast<int>(BestOp.Score) &&
1870
+ Idx == OpIdx)) {
1861
1871
BestOp.Idx = Idx;
1862
1872
BestOp.Score = Score;
1863
1873
BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score;
1864
1874
}
1865
1875
break;
1866
1876
}
1877
+ case ReorderingMode::Constant:
1878
+ if (isa<Constant>(Op) ||
1879
+ (!BestOp.Score && L && L->isLoopInvariant(Op))) {
1880
+ BestOp.Idx = Idx;
1881
+ if (isa<Constant>(Op)) {
1882
+ BestOp.Score = LookAheadHeuristics::ScoreConstants;
1883
+ BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
1884
+ LookAheadHeuristics::ScoreConstants;
1885
+ }
1886
+ if (isa<UndefValue>(Op) || !isa<Constant>(Op))
1887
+ IsUsed = false;
1888
+ }
1889
+ break;
1867
1890
case ReorderingMode::Splat:
1868
- if (Op == OpLastLane)
1891
+ if (Op == OpLastLane || (!BestOp.Score && isa<Constant>(Op))) {
1892
+ IsUsed = Op == OpLastLane;
1893
+ if (Op == OpLastLane) {
1894
+ BestOp.Score = LookAheadHeuristics::ScoreSplat;
1895
+ BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
1896
+ LookAheadHeuristics::ScoreSplat;
1897
+ }
1869
1898
BestOp.Idx = Idx;
1899
+ }
1870
1900
break;
1871
1901
case ReorderingMode::Failed:
1872
1902
llvm_unreachable("Not expected Failed reordering mode.");
@@ -2059,10 +2089,12 @@ class BoUpSLP {
2059
2089
void clear() { OpsVec.clear(); }
2060
2090
2061
2091
/// \Returns true if there are enough operands identical to \p Op to fill
2062
- /// the whole vector.
2092
+ /// the whole vector (it is mixed with constants or loop invariant values) .
2063
2093
/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
2064
2094
bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
2065
2095
bool OpAPO = getData(OpIdx, Lane).APO;
2096
+ bool IsInvariant = L && L->isLoopInvariant(Op);
2097
+ unsigned Cnt = 0;
2066
2098
for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
2067
2099
if (Ln == Lane)
2068
2100
continue;
@@ -2072,22 +2104,51 @@ class BoUpSLP {
2072
2104
OperandData &Data = getData(OpI, Ln);
2073
2105
if (Data.APO != OpAPO || Data.IsUsed)
2074
2106
continue;
2075
- if (Data.V == Op) {
2107
+ Value *OpILane = getValue(OpI, Lane);
2108
+ bool IsConstantOp = isa<Constant>(OpILane);
2109
+ // Consider the broadcast candidate if:
2110
+ // 1. Same value is found in one of the operands.
2111
+ if (Data.V == Op ||
2112
+ // 2. The operand in the given lane is not constant but there is a
2113
+ // constant operand in another lane (which can be moved to the
2114
+ // given lane). In this case we can represent it as a simple
2115
+ // permutation of constant and broadcast.
2116
+ (!IsConstantOp &&
2117
+ ((Lns > 2 && isa<Constant>(Data.V)) ||
2118
+ // 2.1. If we have only 2 lanes, need to check that value in the
2119
+ // next lane does not build same opcode sequence.
2120
+ (Lns == 2 &&
2121
+ !getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
2122
+ .getOpcode() &&
2123
+ isa<Constant>(Data.V)))) ||
2124
+ // 3. The operand in the current lane is loop invariant (can be
2125
+ // hoisted out) and another operand is also a loop invariant
2126
+ // (though not a constant). In this case the whole vector can be
2127
+ // hoisted out.
2128
+ // FIXME: need to teach the cost model about this case for better
2129
+ // estimation.
2130
+ (IsInvariant && !isa<Constant>(Data.V) &&
2131
+ !getSameOpcode({Op, Data.V}, TLI).getOpcode() &&
2132
+ L->isLoopInvariant(Data.V))) {
2076
2133
FoundCandidate = true;
2077
- Data.IsUsed = true;
2134
+ Data.IsUsed = Data.V == Op;
2135
+ if (Data.V == Op)
2136
+ ++Cnt;
2078
2137
break;
2079
2138
}
2080
2139
}
2081
2140
if (!FoundCandidate)
2082
2141
return false;
2083
2142
}
2084
- return true ;
2143
+ return getNumLanes() == 2 || Cnt > 1 ;
2085
2144
}
2086
2145
2087
2146
public:
2088
2147
/// Initialize with all the operands of the instruction vector \p RootVL.
2089
2148
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
2090
- : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R) {
2149
+ : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
2150
+ L(R.LI->getLoopFor(
2151
+ (cast<Instruction>(RootVL.front())->getParent()))) {
2091
2152
// Append all the operands of RootVL.
2092
2153
appendOperandsOfVL(RootVL);
2093
2154
}
@@ -2219,8 +2280,6 @@ class BoUpSLP {
2219
2280
// getBestOperand().
2220
2281
swap(OpIdx, *BestIdx, Lane);
2221
2282
} else {
2222
- // We failed to find a best operand, set mode to 'Failed'.
2223
- ReorderingModes[OpIdx] = ReorderingMode::Failed;
2224
2283
// Enable the second pass.
2225
2284
StrategyFailed = true;
2226
2285
}
0 commit comments