Skip to content

Commit 7567f5b

Browse files
committed
Revert "[SLP]Do extra analysis int minbitwidth if some checks return false."
This reverts commit ea429e1 to fix issues reported in #84536 (comment).
1 parent d6d3d96 commit 7567f5b

File tree

2 files changed

+41
-70
lines changed

2 files changed

+41
-70
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 32 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -10226,11 +10226,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1022610226
for (const TreeEntry *TE : ForRemoval)
1022710227
Set.erase(TE);
1022810228
}
10229-
bool NeedToRemapValues = false;
1023010229
for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
1023110230
if (It->empty()) {
1023210231
UsedTEs.erase(It);
10233-
NeedToRemapValues = true;
1023410232
continue;
1023510233
}
1023610234
std::advance(It, 1);
@@ -10239,19 +10237,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1023910237
Entries.clear();
1024010238
return std::nullopt;
1024110239
}
10242-
// Recalculate the mapping between the values and entries sets.
10243-
if (NeedToRemapValues) {
10244-
DenseMap<Value *, int> PrevUsedValuesEntry;
10245-
PrevUsedValuesEntry.swap(UsedValuesEntry);
10246-
for (auto [Idx, Set] : enumerate(UsedTEs)) {
10247-
DenseSet<Value *> Values;
10248-
for (const TreeEntry *E : Set)
10249-
Values.insert(E->Scalars.begin(), E->Scalars.end());
10250-
for (const auto &P : PrevUsedValuesEntry)
10251-
if (Values.contains(P.first))
10252-
UsedValuesEntry.try_emplace(P.first, Idx);
10253-
}
10254-
}
1025510240
}
1025610241

1025710242
unsigned VF = 0;
@@ -14022,33 +14007,6 @@ bool BoUpSLP::collectValuesToDemote(
1402214007
};
1402314008
unsigned Start = 0;
1402414009
unsigned End = I->getNumOperands();
14025-
14026-
auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14027-
if (!IsProfitableToDemote)
14028-
return false;
14029-
return (ITE && ITE->UserTreeIndices.size() > 1) ||
14030-
IsPotentiallyTruncated(I, BitWidth);
14031-
};
14032-
auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14033-
NeedToExit = false;
14034-
unsigned InitLevel = MaxDepthLevel;
14035-
for (Value *IncValue : Operands) {
14036-
unsigned Level = InitLevel;
14037-
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14038-
ToDemote, DemotedConsts, Visited, Level,
14039-
IsProfitableToDemote, IsTruncRoot)) {
14040-
if (!IsProfitableToDemote)
14041-
return false;
14042-
NeedToExit = true;
14043-
if (!FinalAnalysis(ITE))
14044-
return false;
14045-
continue;
14046-
}
14047-
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14048-
}
14049-
return true;
14050-
};
14051-
bool NeedToExit = false;
1405214010
switch (I->getOpcode()) {
1405314011

1405414012
// We can always demote truncations and extensions. Since truncations can
@@ -14074,21 +14032,35 @@ bool BoUpSLP::collectValuesToDemote(
1407414032
case Instruction::And:
1407514033
case Instruction::Or:
1407614034
case Instruction::Xor: {
14077-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078-
return false;
14079-
if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14035+
unsigned Level1, Level2;
14036+
if ((ITE->UserTreeIndices.size() > 1 &&
14037+
!IsPotentiallyTruncated(I, BitWidth)) ||
14038+
!collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14039+
BitWidth, ToDemote, DemotedConsts, Visited,
14040+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14041+
!collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14042+
BitWidth, ToDemote, DemotedConsts, Visited,
14043+
Level2, IsProfitableToDemote, IsTruncRoot))
1408014044
return false;
14045+
MaxDepthLevel = std::max(Level1, Level2);
1408114046
break;
1408214047
}
1408314048

1408414049
// We can demote selects if we can demote their true and false values.
1408514050
case Instruction::Select: {
14086-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14087-
return false;
1408814051
Start = 1;
14089-
auto *SI = cast<SelectInst>(I);
14090-
if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14052+
unsigned Level1, Level2;
14053+
SelectInst *SI = cast<SelectInst>(I);
14054+
if ((ITE->UserTreeIndices.size() > 1 &&
14055+
!IsPotentiallyTruncated(I, BitWidth)) ||
14056+
!collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14057+
BitWidth, ToDemote, DemotedConsts, Visited,
14058+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14059+
!collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14060+
BitWidth, ToDemote, DemotedConsts, Visited,
14061+
Level2, IsProfitableToDemote, IsTruncRoot))
1409114062
return false;
14063+
MaxDepthLevel = std::max(Level1, Level2);
1409214064
break;
1409314065
}
1409414066

@@ -14099,20 +14071,22 @@ bool BoUpSLP::collectValuesToDemote(
1409914071
MaxDepthLevel = 0;
1410014072
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
1410114073
return false;
14102-
SmallVector<Value *> Ops(PN->incoming_values().begin(),
14103-
PN->incoming_values().end());
14104-
if (!ProcessOperands(Ops, NeedToExit))
14105-
return false;
14074+
for (Value *IncValue : PN->incoming_values()) {
14075+
unsigned Level;
14076+
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14077+
ToDemote, DemotedConsts, Visited, Level,
14078+
IsProfitableToDemote, IsTruncRoot))
14079+
return false;
14080+
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14081+
}
1410614082
break;
1410714083
}
1410814084

1410914085
// Otherwise, conservatively give up.
1411014086
default:
1411114087
MaxDepthLevel = 1;
14112-
return FinalAnalysis();
14088+
return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth);
1411314089
}
14114-
if (NeedToExit)
14115-
return true;
1411614090

1411714091
++MaxDepthLevel;
1411814092
// Gather demoted constant operands.
@@ -14151,17 +14125,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1415114125

1415214126
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
1415314127
// resize to the final type.
14154-
bool IsTruncRoot = false;
1415514128
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1415614129
if (NodeIdx != 0 &&
1415714130
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1415814131
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
1415914132
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
1416014133
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
1416114134
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14162-
IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14163-
IsProfitableToDemoteRoot = true;
1416414135
++NodeIdx;
14136+
IsProfitableToDemoteRoot = true;
1416514137
}
1416614138

1416714139
// Analyzed in reduction already and not profitable - exit.
@@ -14293,6 +14265,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1429314265
ReductionBitWidth = bit_ceil(ReductionBitWidth);
1429414266
}
1429514267
bool IsTopRoot = NodeIdx == 0;
14268+
bool IsTruncRoot = false;
1429614269
while (NodeIdx < VectorizableTree.size() &&
1429714270
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1429814271
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {

llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ for.end: ; preds = %for.end.loopexit, %
228228
; YAML-NEXT: Function: test_unrolled_select
229229
; YAML-NEXT: Args:
230230
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
231-
; YAML-NEXT: - Cost: '-40'
231+
; YAML-NEXT: - Cost: '-36'
232232
; YAML-NEXT: - String: ' and with tree size '
233233
; YAML-NEXT: - TreeSize: '10'
234234

@@ -246,17 +246,15 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
246246
; CHECK-NEXT: [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
247247
; CHECK-NEXT: [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
248248
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
249-
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
249+
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32>
250250
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
251-
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
252-
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
253-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP4]] to <8 x i1>
254-
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i1> [[TMP5]], zeroinitializer
255-
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]]
256-
; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]]
257-
; CHECK-NEXT: [[TMP9:%.*]] = zext <8 x i16> [[TMP8]] to <8 x i32>
258-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
259-
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]]
251+
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
252+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
253+
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[TMP4]], zeroinitializer
254+
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4]]
255+
; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP4]]
256+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
257+
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_047]]
260258
; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
261259
; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
262260
; CHECK: if.end.86:

0 commit comments

Comments
 (0)