Skip to content

Commit d1a7225

Browse files
committed
[SLP]Check if the node must keep its original bitwidth
Need to check if during previous analysis the node has requested to keep its original bitwidth to avoid incorrect codegen. Fixes #120076
1 parent 2402bcc commit d1a7225

File tree

2 files changed

+24
-18
lines changed

2 files changed

+24
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,13 +2931,11 @@ class BoUpSLP {
29312931
/// truncation. We collect the entries that will be demoted in ToDemote.
29322932
/// \param E Node for analysis
29332933
/// \param ToDemote indices of the nodes to be demoted.
2934-
bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
2935-
unsigned &BitWidth,
2936-
SmallVectorImpl<unsigned> &ToDemote,
2937-
DenseSet<const TreeEntry *> &Visited,
2938-
unsigned &MaxDepthLevel,
2939-
bool &IsProfitableToDemote,
2940-
bool IsTruncRoot) const;
2934+
bool collectValuesToDemote(
2935+
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
2936+
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
2937+
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
2938+
bool &IsProfitableToDemote, bool IsTruncRoot) const;
29412939

29422940
/// Check if the operands on the edges \p Edges of the \p UserTE allows
29432941
/// reordering (i.e. the operands can be reordered because they have only one
@@ -17515,8 +17513,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
1751517513
bool BoUpSLP::collectValuesToDemote(
1751617514
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
1751717515
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
17518-
unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
17519-
bool IsTruncRoot) const {
17516+
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
17517+
bool &IsProfitableToDemote, bool IsTruncRoot) const {
1752017518
// We can always demote constants.
1752117519
if (all_of(E.Scalars, IsaPred<Constant>))
1752217520
return true;
@@ -17528,6 +17526,10 @@ bool BoUpSLP::collectValuesToDemote(
1752817526
return true;
1752917527
}
1753017528

17529+
// Check if the node was analyzed already and must keep its original bitwidth.
17530+
if (NodesToKeepBWs.contains(E.Idx))
17531+
return false;
17532+
1753117533
// If the value is not a vectorized instruction in the expression and not used
1753217534
// by the insertelement instruction and not used in multiple vector nodes, it
1753317535
// cannot be demoted.
@@ -17623,8 +17625,8 @@ bool BoUpSLP::collectValuesToDemote(
1762317625
for (const TreeEntry *Op : Operands) {
1762417626
unsigned Level = InitLevel;
1762517627
if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
17626-
ToDemote, Visited, Level, IsProfitableToDemote,
17627-
IsTruncRoot)) {
17628+
ToDemote, Visited, NodesToKeepBWs, Level,
17629+
IsProfitableToDemote, IsTruncRoot)) {
1762817630
if (!IsProfitableToDemote)
1762917631
return false;
1763017632
NeedToExit = true;
@@ -17926,6 +17928,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1792617928
bool IsTruncRoot = false;
1792717929
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1792817930
SmallVector<unsigned> RootDemotes;
17931+
SmallDenseSet<unsigned, 8> NodesToKeepBWs;
1792917932
if (NodeIdx != 0 &&
1793017933
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1793117934
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
@@ -17949,6 +17952,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1794917952
// Check if the root is trunc and the next node is gather/buildvector, then
1795017953
// keep trunc in scalars, which is free in most cases.
1795117954
if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
17955+
!NodesToKeepBWs.contains(E.Idx) &&
1795217956
E.Idx > (IsStoreOrInsertElt ? 2u : 1u) &&
1795317957
all_of(E.Scalars, [&](Value *V) {
1795417958
return V->hasOneUse() || isa<Constant>(V) ||
@@ -18071,8 +18075,8 @@ void BoUpSLP::computeMinimumValueSizes() {
1807118075
bool NeedToDemote = IsProfitableToDemote;
1807218076

1807318077
if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
18074-
ToDemote, Visited, MaxDepthLevel, NeedToDemote,
18075-
IsTruncRoot) ||
18078+
ToDemote, Visited, NodesToKeepBWs, MaxDepthLevel,
18079+
NeedToDemote, IsTruncRoot) ||
1807618080
(MaxDepthLevel <= Limit &&
1807718081
!(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
1807818082
(!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
@@ -18206,14 +18210,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1820618210
});
1820718211
}
1820818212

18209-
// If the maximum bit width we compute is less than the with of the roots'
18213+
// If the maximum bit width we compute is less than the width of the roots'
1821018214
// type, we can proceed with the narrowing. Otherwise, do nothing.
1821118215
if (MaxBitWidth == 0 ||
1821218216
MaxBitWidth >=
1821318217
cast<IntegerType>(TreeRoot.front()->getType()->getScalarType())
1821418218
->getBitWidth()) {
1821518219
if (UserIgnoreList)
1821618220
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
18221+
NodesToKeepBWs.insert(ToDemote.begin(), ToDemote.end());
1821718222
continue;
1821818223
}
1821918224

llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ define i8 @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
9-
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> zeroinitializer, [[TMP0]]
10-
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
11-
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], [[TMP0]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[TMP3]], [[TMP0]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
10+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[TMP2]]
13+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP4]], [[TMP2]]
1314
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
1415
; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
1516
; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]]

0 commit comments

Comments
 (0)