Skip to content

Commit 3a90cb4

Browse files
committed
Revert "[SLP]Do extra analysis int minbitwidth if some checks return false."
This reverts commit da118c9 to fix crashes reported in #84363.
1 parent e52a687 commit 3a90cb4

File tree

2 files changed

+51
-64
lines changed

2 files changed

+51
-64
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 42 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13988,6 +13988,15 @@ bool BoUpSLP::collectValuesToDemote(
1398813988
// If the value is not a vectorized instruction in the expression and not used
1398913989
// by the insertelement instruction and not used in multiple vector nodes, it
1399013990
// cannot be demoted.
13991+
// TODO: improve handling of gathered values and others.
13992+
auto *I = dyn_cast<Instruction>(V);
13993+
const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
13994+
if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
13995+
all_of(I->users(), [&](User *U) {
13996+
return isa<InsertElementInst>(U) && !getTreeEntry(U);
13997+
}))
13998+
return false;
13999+
1399114000
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
1399214001
if (MultiNodeScalars.contains(V))
1399314002
return false;
@@ -14002,44 +14011,8 @@ bool BoUpSLP::collectValuesToDemote(
1400214011
BitWidth = std::max(BitWidth, BitWidth1);
1400314012
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
1400414013
};
14005-
auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14006-
if (!IsProfitableToDemote)
14007-
return false;
14008-
return (ITE && ITE->UserTreeIndices.size() > 1) ||
14009-
IsPotentiallyTruncated(V, BitWidth);
14010-
};
14011-
// TODO: improve handling of gathered values and others.
14012-
auto *I = dyn_cast<Instruction>(V);
14013-
const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
14014-
if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
14015-
all_of(I->users(), [&](User *U) {
14016-
return isa<InsertElementInst>(U) && !getTreeEntry(U);
14017-
}))
14018-
return FinalAnalysis();
14019-
1402014014
unsigned Start = 0;
1402114015
unsigned End = I->getNumOperands();
14022-
14023-
auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14024-
NeedToExit = false;
14025-
unsigned InitLevel = MaxDepthLevel;
14026-
for (Value *IncValue : Operands) {
14027-
unsigned Level = InitLevel;
14028-
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14029-
ToDemote, DemotedConsts, Visited, Level,
14030-
IsProfitableToDemote, IsTruncRoot)) {
14031-
if (!IsProfitableToDemote)
14032-
return false;
14033-
NeedToExit = true;
14034-
if (!FinalAnalysis(ITE))
14035-
return false;
14036-
continue;
14037-
}
14038-
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14039-
}
14040-
return true;
14041-
};
14042-
bool NeedToExit = false;
1404314016
switch (I->getOpcode()) {
1404414017

1404514018
// We can always demote truncations and extensions. Since truncations can
@@ -14065,21 +14038,35 @@ bool BoUpSLP::collectValuesToDemote(
1406514038
case Instruction::And:
1406614039
case Instruction::Or:
1406714040
case Instruction::Xor: {
14068-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14069-
return false;
14070-
if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14041+
unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14042+
if ((ITE->UserTreeIndices.size() > 1 &&
14043+
!IsPotentiallyTruncated(I, BitWidth)) ||
14044+
!collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14045+
BitWidth, ToDemote, DemotedConsts, Visited,
14046+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14047+
!collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14048+
BitWidth, ToDemote, DemotedConsts, Visited,
14049+
Level2, IsProfitableToDemote, IsTruncRoot))
1407114050
return false;
14051+
MaxDepthLevel = std::max(Level1, Level2);
1407214052
break;
1407314053
}
1407414054

1407514055
// We can demote selects if we can demote their true and false values.
1407614056
case Instruction::Select: {
14077-
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078-
return false;
1407914057
Start = 1;
14080-
auto *SI = cast<SelectInst>(I);
14081-
if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14058+
unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14059+
SelectInst *SI = cast<SelectInst>(I);
14060+
if ((ITE->UserTreeIndices.size() > 1 &&
14061+
!IsPotentiallyTruncated(I, BitWidth)) ||
14062+
!collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14063+
BitWidth, ToDemote, DemotedConsts, Visited,
14064+
Level1, IsProfitableToDemote, IsTruncRoot) ||
14065+
!collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14066+
BitWidth, ToDemote, DemotedConsts, Visited,
14067+
Level2, IsProfitableToDemote, IsTruncRoot))
1408214068
return false;
14069+
MaxDepthLevel = std::max(Level1, Level2);
1408314070
break;
1408414071
}
1408514072

@@ -14089,20 +14076,23 @@ bool BoUpSLP::collectValuesToDemote(
1408914076
PHINode *PN = cast<PHINode>(I);
1409014077
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
1409114078
return false;
14092-
SmallVector<Value *> Ops(PN->incoming_values().begin(),
14093-
PN->incoming_values().end());
14094-
if (!ProcessOperands(Ops, NeedToExit))
14095-
return false;
14079+
unsigned InitLevel = MaxDepthLevel;
14080+
for (Value *IncValue : PN->incoming_values()) {
14081+
unsigned Level = InitLevel;
14082+
if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14083+
ToDemote, DemotedConsts, Visited, Level,
14084+
IsProfitableToDemote, IsTruncRoot))
14085+
return false;
14086+
MaxDepthLevel = std::max(MaxDepthLevel, Level);
14087+
}
1409614088
break;
1409714089
}
1409814090

1409914091
// Otherwise, conservatively give up.
1410014092
default:
1410114093
MaxDepthLevel = 1;
14102-
return FinalAnalysis();
14094+
return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth);
1410314095
}
14104-
if (NeedToExit)
14105-
return true;
1410614096

1410714097
++MaxDepthLevel;
1410814098
// Gather demoted constant operands.
@@ -14141,17 +14131,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1414114131

1414214132
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
1414314133
// resize to the final type.
14144-
bool IsTruncRoot = false;
1414514134
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1414614135
if (NodeIdx != 0 &&
1414714136
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1414814137
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
1414914138
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
1415014139
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
1415114140
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14152-
IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14153-
IsProfitableToDemoteRoot = true;
1415414141
++NodeIdx;
14142+
IsProfitableToDemoteRoot = true;
1415514143
}
1415614144

1415714145
// Analyzed in reduction already and not profitable - exit.
@@ -14283,6 +14271,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1428314271
ReductionBitWidth = bit_ceil(ReductionBitWidth);
1428414272
}
1428514273
bool IsTopRoot = NodeIdx == 0;
14274+
bool IsTruncRoot = false;
1428614275
while (NodeIdx < VectorizableTree.size() &&
1428714276
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1428814277
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {

llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ for.end: ; preds = %for.end.loopexit, %
228228
; YAML-NEXT: Function: test_unrolled_select
229229
; YAML-NEXT: Args:
230230
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
231-
; YAML-NEXT: - Cost: '-41'
231+
; YAML-NEXT: - Cost: '-36'
232232
; YAML-NEXT: - String: ' and with tree size '
233233
; YAML-NEXT: - TreeSize: '10'
234234

@@ -246,17 +246,15 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
246246
; CHECK-NEXT: [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
247247
; CHECK-NEXT: [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
248248
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
249-
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
249+
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32>
250250
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
251-
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
252-
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
253-
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[TMP4]] to <8 x i32>
254-
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP5]], zeroinitializer
255-
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]]
256-
; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]]
257-
; CHECK-NEXT: [[TMP9:%.*]] = sext <8 x i16> [[TMP8]] to <8 x i32>
258-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
259-
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]]
251+
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
252+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
253+
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[TMP4]], zeroinitializer
254+
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4]]
255+
; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP4]]
256+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
257+
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_047]]
260258
; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
261259
; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
262260
; CHECK: if.end.86:

0 commit comments

Comments
 (0)