Skip to content

Commit 036e48e

Browse files
committed
[SLP]Fix PR76850: do the analysis of the submask.
Need to limit the transformation of the VecMask by the corresponding part of the mask of SliceSize size to avoid compiler crash during further cost analysis.
1 parent 0deb27c commit 036e48e

File tree

2 files changed

+35
-6
lines changed

2 files changed

+35
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10596,7 +10596,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1059610596
inversePermutation(E->ReorderIndices, ReorderMask);
1059710597
if (!ReorderMask.empty())
1059810598
reorderScalars(GatheredScalars, ReorderMask);
10599-
auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF) {
10599+
auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
10600+
unsigned I, unsigned SliceSize) {
1060010601
if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
1060110602
return isa<UndefValue>(V) && !isa<PoisonValue>(V);
1060210603
}))
@@ -10619,11 +10620,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1061910620
Idx == 0) ||
1062010621
(Mask.size() == InputVF &&
1062110622
ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) {
10622-
std::iota(Mask.begin(), Mask.end(), 0);
10623+
std::iota(std::next(Mask.begin(), I * SliceSize),
10624+
std::next(Mask.begin(), (I + 1) * SliceSize), 0);
1062310625
} else {
10624-
unsigned I =
10626+
unsigned IVal =
1062510627
*find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
10626-
std::fill(Mask.begin(), Mask.end(), I);
10628+
std::fill(std::next(Mask.begin(), I * SliceSize),
10629+
std::next(Mask.begin(), (I + 1) * SliceSize), IVal);
1062710630
}
1062810631
return true;
1062910632
};
@@ -10872,7 +10875,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1087210875
} else if (Vec1) {
1087310876
IsUsedInExpr &= FindReusedSplat(
1087410877
ExtractMask,
10875-
cast<FixedVectorType>(Vec1->getType())->getNumElements());
10878+
cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
10879+
ExtractMask.size());
1087610880
ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
1087710881
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
1087810882
} else {
@@ -10898,7 +10902,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1089810902
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
1089910903
if (TEs.size() == 1) {
1090010904
IsUsedInExpr &=
10901-
FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
10905+
FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
1090210906
ShuffleBuilder.add(*TEs.front(), VecMask);
1090310907
if (TEs.front()->VectorizedValue)
1090410908
IsNonPoisoned &=
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu %s -o - -slp-threshold=-100 | FileCheck %s
3+
declare i64 @llvm.smax.i64(i64, i64)
4+
5+
define i8 @foo(i64 %val_i64_57) {
6+
; CHECK-LABEL: define i8 @foo(
7+
; CHECK-SAME: i64 [[VAL_I64_57:%.*]]) {
8+
; CHECK-NEXT: entry_1:
9+
; CHECK-NEXT: [[VAL_I64_58:%.*]] = call i64 @llvm.smax.i64(i64 0, i64 1)
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 0, i64 poison, i64 poison, i64 0>, i64 [[VAL_I64_57]], i32 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL_I64_58]], i32 2
12+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[TMP1]], [[TMP2]]
14+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <4 x i64> [[TMP1]], [[TMP2]]
15+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
16+
; CHECK-NEXT: ret i8 0
17+
;
18+
entry_1:
19+
%val_i64_58 = call i64 @llvm.smax.i64(i64 0, i64 1)
20+
%val_i1_89 = icmp ule i64 %val_i64_57, %val_i64_58
21+
%val_i1_95 = icmp sle i64 0, undef
22+
%val_i1_98 = icmp uge i64 %val_i64_58, %val_i64_58
23+
%val_i1_99 = icmp ule i64 0, %val_i64_58
24+
ret i8 0
25+
}

0 commit comments

Comments
 (0)