Skip to content

Commit 5b303a9

Browse files
committed
Revert "[SLP]Do extra analysis int minbitwidth if some checks return false."
This reverts commit ea429e1 to fix issues revealed in https://lab.llvm.org/buildbot/#/builders/186/builds/15299 and https://lab.llvm.org/buildbot/#/builders/238/builds/8426.
1 parent 70d0ebb commit 5b303a9

File tree

2 files changed

+41
-70
lines changed

2 files changed

+41
-70
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 32 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10225,11 +10225,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1022510225
for (const TreeEntry *TE : ForRemoval)
1022610226
Set.erase(TE);
1022710227
}
10228-
bool NeedToRemapValues = false;
1022910228
for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
1023010229
if (It->empty()) {
1023110230
UsedTEs.erase(It);
10232-
NeedToRemapValues = true;
1023310231
continue;
1023410232
}
1023510233
std::advance(It, 1);
@@ -10238,19 +10236,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1023810236
Entries.clear();
1023910237
return std::nullopt;
1024010238
}
10241-
// Recalculate the mapping between the values and entries sets.
10242-
if (NeedToRemapValues) {
10243-
DenseMap<Value *, int> PrevUsedValuesEntry;
10244-
PrevUsedValuesEntry.swap(UsedValuesEntry);
10245-
for (auto [Idx, Set] : enumerate(UsedTEs)) {
10246-
DenseSet<Value *> Values;
10247-
for (const TreeEntry *E : Set)
10248-
Values.insert(E->Scalars.begin(), E->Scalars.end());
10249-
for (const auto &P : PrevUsedValuesEntry)
10250-
if (Values.contains(P.first))
10251-
UsedValuesEntry.try_emplace(P.first, Idx);
10252-
}
10253-
}
1025410239
}
1025510240

1025610241
unsigned VF = 0;
@@ -14016,33 +14001,6 @@ bool BoUpSLP::collectValuesToDemote(
1401614001
};
1401714002
unsigned Start = 0;
1401814003
unsigned End = I->getNumOperands();
14019-
14020-
auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14021-
if (!IsProfitableToDemote)
14022-
return false;
14023-
return (ITE && ITE->UserTreeIndices.size() > 1) ||
14024-
IsPotentiallyTruncated(I, BitWidth);
14025-
};
14026-
auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14027-
NeedToExit = false;
14028-
unsigned InitLevel = MaxDepthLevel;
14029-
for (Value *IncValue : Operands) {
14030-
unsigned Level = InitLevel;
14031-
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14032-
ToDemote, DemotedConsts, Visited, Level,
14033-
IsProfitableToDemote, IsTruncRoot)) {
14034-
if (!IsProfitableToDemote)
14035-
return false;
14036-
NeedToExit = true;
14037-
if (!FinalAnalysis(ITE))
14038-
return false;
14039-
continue;
14040-
}
14041-
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14042-
}
14043-
return true;
14044-
};
14045-
bool NeedToExit = false;
1404614004
switch (I->getOpcode()) {
1404714005

1404814006
// We can always demote truncations and extensions. Since truncations can
@@ -14068,21 +14026,35 @@ bool BoUpSLP::collectValuesToDemote(
1406814026
case Instruction::And:
1406914027
case Instruction::Or:
1407014028
case Instruction::Xor: {
14071-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14072-
return false;
14073-
if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14029+
unsigned Level1, Level2;
14030+
if ((ITE->UserTreeIndices.size() > 1 &&
14031+
!IsPotentiallyTruncated(I, BitWidth)) ||
14032+
!collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14033+
BitWidth, ToDemote, DemotedConsts, Visited,
14034+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14035+
!collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14036+
BitWidth, ToDemote, DemotedConsts, Visited,
14037+
Level2, IsProfitableToDemote, IsTruncRoot))
1407414038
return false;
14039+
MaxDepthLevel = std::max(Level1, Level2);
1407514040
break;
1407614041
}
1407714042

1407814043
// We can demote selects if we can demote their true and false values.
1407914044
case Instruction::Select: {
14080-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14081-
return false;
1408214045
Start = 1;
14083-
auto *SI = cast<SelectInst>(I);
14084-
if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14046+
unsigned Level1, Level2;
14047+
SelectInst *SI = cast<SelectInst>(I);
14048+
if ((ITE->UserTreeIndices.size() > 1 &&
14049+
!IsPotentiallyTruncated(I, BitWidth)) ||
14050+
!collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14051+
BitWidth, ToDemote, DemotedConsts, Visited,
14052+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14053+
!collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14054+
BitWidth, ToDemote, DemotedConsts, Visited,
14055+
Level2, IsProfitableToDemote, IsTruncRoot))
1408514056
return false;
14057+
MaxDepthLevel = std::max(Level1, Level2);
1408614058
break;
1408714059
}
1408814060

@@ -14093,20 +14065,22 @@ bool BoUpSLP::collectValuesToDemote(
1409314065
MaxDepthLevel = 0;
1409414066
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
1409514067
return false;
14096-
SmallVector<Value *> Ops(PN->incoming_values().begin(),
14097-
PN->incoming_values().end());
14098-
if (!ProcessOperands(Ops, NeedToExit))
14099-
return false;
14068+
for (Value *IncValue : PN->incoming_values()) {
14069+
unsigned Level;
14070+
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14071+
ToDemote, DemotedConsts, Visited, Level,
14072+
IsProfitableToDemote, IsTruncRoot))
14073+
return false;
14074+
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14075+
}
1410014076
break;
1410114077
}
1410214078

1410314079
// Otherwise, conservatively give up.
1410414080
default:
1410514081
MaxDepthLevel = 1;
14106-
return FinalAnalysis();
14082+
return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth);
1410714083
}
14108-
if (NeedToExit)
14109-
return true;
1411014084

1411114085
++MaxDepthLevel;
1411214086
// Gather demoted constant operands.
@@ -14145,17 +14119,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1414514119

1414614120
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
1414714121
// resize to the final type.
14148-
bool IsTruncRoot = false;
1414914122
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1415014123
if (NodeIdx != 0 &&
1415114124
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1415214125
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
1415314126
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
1415414127
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
1415514128
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14156-
IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14157-
IsProfitableToDemoteRoot = true;
1415814129
++NodeIdx;
14130+
IsProfitableToDemoteRoot = true;
1415914131
}
1416014132

1416114133
// Analyzed in reduction already and not profitable - exit.
@@ -14287,6 +14259,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1428714259
ReductionBitWidth = bit_ceil(ReductionBitWidth);
1428814260
}
1428914261
bool IsTopRoot = NodeIdx == 0;
14262+
bool IsTruncRoot = false;
1429014263
while (NodeIdx < VectorizableTree.size() &&
1429114264
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1429214265
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {

llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ for.end: ; preds = %for.end.loopexit, %
228228
; YAML-NEXT: Function: test_unrolled_select
229229
; YAML-NEXT: Args:
230230
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
231-
; YAML-NEXT: - Cost: '-40'
231+
; YAML-NEXT: - Cost: '-36'
232232
; YAML-NEXT: - String: ' and with tree size '
233233
; YAML-NEXT: - TreeSize: '10'
234234

@@ -246,17 +246,15 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
246246
; CHECK-NEXT: [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
247247
; CHECK-NEXT: [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
248248
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
249-
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
249+
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32>
250250
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
251-
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
252-
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
253-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP4]] to <8 x i1>
254-
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i1> [[TMP5]], zeroinitializer
255-
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]]
256-
; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]]
257-
; CHECK-NEXT: [[TMP9:%.*]] = zext <8 x i16> [[TMP8]] to <8 x i32>
258-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
259-
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]]
251+
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
252+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
253+
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[TMP4]], zeroinitializer
254+
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4]]
255+
; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP4]]
256+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
257+
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_047]]
260258
; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
261259
; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
262260
; CHECK: if.end.86:

0 commit comments

Comments
 (0)