Skip to content

Commit af15c46

Browse files
committed
[SLP]Do not crash if number of vector registers does not feet the vector
type. Need to check, if the number of vector registers, returned by TTI, is not greater than total number of mask element and not zero, before trying to perform any operations. TTI still may return non-valid number of registers.
1 parent fb08c69 commit af15c46

File tree

2 files changed

+57
-4
lines changed

2 files changed

+57
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7580,8 +7580,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
75807580
auto *MaskVecTy =
75817581
FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
75827582
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
7583-
assert(NumParts > 0 && NumParts < Mask.size() &&
7584-
"Expected positive number of registers.");
7583+
if (NumParts == 0 || NumParts >= Mask.size())
7584+
NumParts = 1;
75857585
unsigned SliceSize = Mask.size() / NumParts;
75867586
const auto *It =
75877587
find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
@@ -7598,8 +7598,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
75987598
auto *MaskVecTy =
75997599
FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
76007600
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
7601-
assert(NumParts > 0 && NumParts < Mask.size() &&
7602-
"Expected positive number of registers.");
7601+
if (NumParts == 0 || NumParts >= Mask.size())
7602+
NumParts = 1;
76037603
unsigned SliceSize = Mask.size() / NumParts;
76047604
const auto *It =
76057605
find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-160 | FileCheck %s
3+
4+
define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) {
5+
; CHECK-LABEL: @test1(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i128> poison, i128 [[P0:%.*]], i32 0
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i128> [[TMP0]], i128 [[P1:%.*]], i32 1
9+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i128> [[TMP1]] to <2 x i32>
10+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
11+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i128> poison, i128 [[P2:%.*]], i32 0
12+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i128> [[TMP4]], i128 [[P3:%.*]], i32 1
13+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i128> [[TMP5]] to <2 x i32>
14+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
15+
; CHECK-NEXT: [[T5:%.*]] = trunc i128 [[P1]] to i32
16+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP3]], [[TMP7]]
17+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i128> [[TMP1]], <2 x i128> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 3, i32 2>
18+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i128> [[VEC:%.*]], <4 x i128> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
19+
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i128> [[TMP10]] to <4 x i32>
20+
; CHECK-NEXT: [[TMP12:%.*]] = sdiv <4 x i32> [[TMP8]], [[TMP11]]
21+
; CHECK-NEXT: br label [[BB:%.*]]
22+
; CHECK: bb:
23+
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP12]], [[ENTRY:%.*]] ]
24+
; CHECK-NEXT: ret void
25+
;
26+
entry:
27+
%t1 = trunc i128 %p0 to i32
28+
%t2 = trunc i128 %p1 to i32
29+
%t3 = trunc i128 %p2 to i32
30+
%t4 = trunc i128 %p3 to i32
31+
%t5 = trunc i128 %p1 to i32
32+
%t6 = trunc i128 %p0 to i32
33+
%t7 = trunc i128 %p3 to i32
34+
%t8 = trunc i128 %p2 to i32
35+
%m0 = sdiv i32 %t1, %t3
36+
%m1 = sdiv i32 %t2, %t4
37+
%m2 = sdiv i32 %t1, %t3
38+
%m3 = sdiv i32 %t2, %t4
39+
%e0 = extractelement <4 x i128> %vec, i32 0
40+
%t9 = trunc i128 %e0 to i32
41+
%d0 = sdiv i32 %m0, %t9
42+
%d1 = sdiv i32 %m1, %t6
43+
%d2 = sdiv i32 %m2, %t7
44+
%d3 = sdiv i32 %m3, %t8
45+
br label %bb
46+
47+
bb:
48+
%phi0 = phi i32 [ %d0, %entry ]
49+
%phi1 = phi i32 [ %d1, %entry ]
50+
%phi2 = phi i32 [ %d2, %entry ]
51+
%phi3 = phi i32 [ %d3, %entry ]
52+
ret void
53+
}

0 commit comments

Comments
 (0)