Skip to content

Commit 76f0ff8

Browse files
committed
[SLP]Add an extra check to avoid infinite vectorization attempts
Added extra check for the cost of the buildvector if the -slp-threshold option is used. Prevents infinite vectorization attempts.
1 parent ece4e12 commit 76f0ff8

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12034,7 +12034,14 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
1203412034
if (VectorizableTree.back()->isGather() &&
1203512035
VectorizableTree.back()->isAltShuffle() &&
1203612036
VectorizableTree.back()->getVectorFactor() > 2 &&
12037-
allSameBlock(VectorizableTree.back()->Scalars))
12037+
allSameBlock(VectorizableTree.back()->Scalars) &&
12038+
!VectorizableTree.back()->Scalars.front()->getType()->isVectorTy() &&
12039+
TTI->getScalarizationOverhead(
12040+
getWidenedType(VectorizableTree.back()->Scalars.front()->getType(),
12041+
VectorizableTree.back()->getVectorFactor()),
12042+
APInt::getAllOnes(VectorizableTree.back()->getVectorFactor()),
12043+
/*Insert=*/true, /*Extract=*/false,
12044+
TTI::TCK_RecipThroughput) > -SLPCostThreshold)
1203812045
return false;
1203912046

1204012047
// Otherwise, we can't vectorize the tree. It is both tiny and not fully

llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @test() {
77
; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
99
; CHECK-NEXT: [[TMP1:%.*]] = icmp samesign ult <4 x i32> [[TMP0]], zeroinitializer
10-
; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0
10+
; CHECK-NEXT: [[ICMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
1111
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
1212
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
1313
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 [[ZEXT]]
@@ -16,8 +16,6 @@ define void @test() {
1616
; CHECK-NEXT: [[CALL:%.*]] = call i32 null(<2 x double> zeroinitializer)
1717
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
1818
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
19-
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> poison, <4 x i1> [[TMP3]], i64 0)
20-
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4)
2119
; CHECK-NEXT: ret void
2220
;
2321
bb:

0 commit comments

Comments
 (0)