Skip to content

Commit b74e09c

Browse files
committed
[SLP]Check for the whole vector vectorization in unique scalars analysis
Need to check that thr whole number of register is attempted to vectorize before actually trying to build the node to avoid compiler crash.
1 parent dd94537 commit b74e09c

File tree

2 files changed

+78
-14
lines changed

2 files changed

+78
-14
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
409409
/// total number of elements \p Size and number of registers (parts) \p
410410
/// NumParts.
411411
static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
412-
return PowerOf2Ceil(divideCeil(Size, NumParts));
412+
return std::min<unsigned>(Size, PowerOf2Ceil(divideCeil(Size, NumParts)));
413413
}
414414

415415
/// Returns correct remaining number of elements, considering total amount \p
@@ -7022,7 +7022,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
70227022
UniqueValues.emplace_back(V);
70237023
}
70247024
size_t NumUniqueScalarValues = UniqueValues.size();
7025-
if (NumUniqueScalarValues == VL.size()) {
7025+
bool IsFullVectors =
7026+
hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7027+
NumUniqueScalarValues);
7028+
if (NumUniqueScalarValues == VL.size() &&
7029+
(VectorizeNonPowerOf2 || IsFullVectors)) {
70267030
ReuseShuffleIndices.clear();
70277031
} else {
70287032
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
@@ -7033,14 +7037,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
70337037
return false;
70347038
}
70357039
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
7036-
if (NumUniqueScalarValues <= 1 ||
7037-
(UniquePositions.size() == 1 && all_of(UniqueValues,
7038-
[](Value *V) {
7039-
return isa<UndefValue>(V) ||
7040-
!isConstant(V);
7041-
})) ||
7042-
!hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7043-
NumUniqueScalarValues)) {
7040+
if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
7041+
(UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
7042+
return isa<UndefValue>(V) || !isConstant(V);
7043+
}))) {
70447044
if (DoNotFail && UniquePositions.size() > 1 &&
70457045
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
70467046
all_of(UniqueValues, [=](Value *V) {
@@ -9144,9 +9144,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
91449144
return nullptr;
91459145
Value *VecBase = nullptr;
91469146
ArrayRef<Value *> VL = E->Scalars;
9147-
// If the resulting type is scalarized, do not adjust the cost.
9148-
if (NumParts == VL.size())
9149-
return nullptr;
91509147
// Check if it can be considered reused if same extractelements were
91519148
// vectorized already.
91529149
bool PrevNodeFound = any_of(
@@ -9799,7 +9796,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
97999796
InsertMask[Idx] = I + 1;
98009797
}
98019798
unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
9802-
if (NumOfParts > 0)
9799+
if (NumOfParts > 0 && NumOfParts < NumElts)
98039800
VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
98049801
unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
98059802
VecScalarsSz;
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v -slp-threshold=-10 < %s | FileCheck %s
3+
4+
define void @test(ptr %agg.result) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[ARRAYIDX_I39_1:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 8
9+
; CHECK-NEXT: [[ARRAYIDX_I39_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 16
10+
; CHECK-NEXT: [[ADD_PTR_I41_1_1_1:%.*]] = getelementptr i8, ptr null, i64 16
11+
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ADD_PTR_I41_1_1_1]], align 8
12+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr null, align 8
13+
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr null, align 8
14+
; CHECK-NEXT: [[MUL_1:%.*]] = fmul double [[TMP2]], 0.000000e+00
15+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 2
16+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
18+
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP5]], <2 x double> [[TMP1]], i64 0)
19+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], <double 0.000000e+00, double 0.000000e+00, double 1.000000e+00, double 0.000000e+00>
20+
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> zeroinitializer, [[TMP7]]
21+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP8]], i32 1
22+
; CHECK-NEXT: store double [[TMP9]], ptr [[ARRAYIDX_I39_1]], align 8
23+
; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[ARRAYIDX_I39_2]], align 8
24+
; CHECK-NEXT: [[ARRAYIDX_I37_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 48
25+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
26+
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAYIDX_I37_2]], align 8
27+
; CHECK-NEXT: [[ARRAYIDX_I39_2_2:%.*]] = getelementptr i8, ptr [[AGG_RESULT]], i64 64
28+
; CHECK-NEXT: [[MUL_1_2_2:%.*]] = fmul double 1.000000e+00, 0.000000e+00
29+
; CHECK-NEXT: [[MUL_2_2_2:%.*]] = fmul double 0.000000e+00, [[MUL_1_2_2]]
30+
; CHECK-NEXT: store double [[MUL_2_2_2]], ptr [[ARRAYIDX_I39_2_2]], align 8
31+
; CHECK-NEXT: ret void
32+
;
33+
entry:
34+
%0 = load double, ptr null, align 8
35+
%mul.1 = fmul double %0, 0.000000e+00
36+
%arrayidx.i39.1 = getelementptr i8, ptr %agg.result, i64 8
37+
%add.ptr.i41.1.1 = getelementptr i8, ptr null, i64 8
38+
%1 = load double, ptr %add.ptr.i41.1.1, align 8
39+
%mul.1.1 = fmul double %1, 0.000000e+00
40+
%mul.2.1 = fmul double 0.000000e+00, %mul.1.1
41+
store double %mul.2.1, ptr %arrayidx.i39.1, align 8
42+
%arrayidx.i39.2 = getelementptr i8, ptr %agg.result, i64 16
43+
%mul.1.2 = fmul double %0, 0.000000e+00
44+
%mul.2.2 = fmul double 0.000000e+00, %mul.1.2
45+
store double %mul.2.2, ptr %arrayidx.i39.2, align 8
46+
%arrayidx.i37.1 = getelementptr i8, ptr %agg.result, i64 24
47+
store double %mul.2.1, ptr %arrayidx.i37.1, align 8
48+
%arrayidx.i39.1.1 = getelementptr i8, ptr %agg.result, i64 32
49+
%add.ptr.i41.1.1.1 = getelementptr i8, ptr null, i64 16
50+
%2 = load double, ptr %add.ptr.i41.1.1.1, align 8
51+
%mul.1.1.1 = fmul double %2, 1.000000e+00
52+
%mul.2.1.1 = fmul double 0.000000e+00, %mul.1.1.1
53+
store double %mul.2.1.1, ptr %arrayidx.i39.1.1, align 8
54+
%arrayidx.i39.2.1 = getelementptr i8, ptr %agg.result, i64 40
55+
%mul.1.2.1 = fmul double %1, 0.000000e+00
56+
%mul.2.2.1 = fmul double 0.000000e+00, %mul.1.2.1
57+
store double %mul.2.2.1, ptr %arrayidx.i39.2.1, align 8
58+
%arrayidx.i37.2 = getelementptr i8, ptr %agg.result, i64 48
59+
store double %mul.2.2, ptr %arrayidx.i37.2, align 8
60+
%arrayidx.i39.1.2 = getelementptr i8, ptr %agg.result, i64 56
61+
store double %mul.2.2.1, ptr %arrayidx.i39.1.2, align 8
62+
%arrayidx.i39.2.2 = getelementptr i8, ptr %agg.result, i64 64
63+
%mul.1.2.2 = fmul double 1.000000e+00, 0.000000e+00
64+
%mul.2.2.2 = fmul double 0.000000e+00, %mul.1.2.2
65+
store double %mul.2.2.2, ptr %arrayidx.i39.2.2, align 8
66+
ret void
67+
}

0 commit comments

Comments
 (0)