Skip to content

Commit d595080

Browse files
committed
[SLP]Fix PR87384: check for fixed vector type before using.
If we have mixed extractelement instructions, fixed and scalable ones, need to check that compiler tries to estimate the cost for fixed vector extractelement, not the scalable one, to avoid compiler crash.
1 parent 3d469c0 commit d595080

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7773,7 +7773,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
77737773
auto *EE = dyn_cast<ExtractElementInst>(V);
77747774
if (!EE)
77757775
return Sz;
7776-
auto *VecTy = cast<FixedVectorType>(EE->getVectorOperandType());
7776+
auto *VecTy = dyn_cast<FixedVectorType>(EE->getVectorOperandType());
7777+
if (!VecTy)
7778+
return Sz;
77777779
return std::max(Sz, VecTy->getNumElements());
77787780
});
77797781
unsigned NumSrcRegs = TTI.getNumberOfParts(
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr="+v" < %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i8> zeroinitializer, i64 0
9+
; CHECK-NEXT: [[CONV5:%.*]] = sext i8 [[VECTOR_RECUR_EXTRACT]] to i32
10+
; CHECK-NEXT: store i32 [[CONV5]], ptr getelementptr ([0 x i32], ptr null, i64 0, i64 -14), align 4
11+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr getelementptr ([9 x i8], ptr null, i64 -2, i64 5), align 1
12+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i8> [[TMP0]], zeroinitializer
13+
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
14+
; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr getelementptr ([0 x i16], ptr null, i64 0, i64 -14), align 2
15+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP0]], i32 0
16+
; CHECK-NEXT: [[CONV5_1:%.*]] = sext i8 [[TMP3]] to i32
17+
; CHECK-NEXT: store i32 [[CONV5_1]], ptr getelementptr ([0 x i32], ptr null, i64 0, i64 -13), align 4
18+
; CHECK-NEXT: ret i32 0
19+
;
20+
entry:
21+
%vector.recur.extract = extractelement <vscale x 4 x i8> zeroinitializer, i64 0
22+
%0 = load i8, ptr getelementptr ([9 x i8], ptr null, i64 -2, i64 5), align 1
23+
%tobool1.not = icmp ne i8 %0, 0
24+
%conv2 = zext i1 %tobool1.not to i16
25+
store i16 %conv2, ptr getelementptr ([0 x i16], ptr null, i64 0, i64 -14), align 2
26+
%conv5 = sext i8 %vector.recur.extract to i32
27+
store i32 %conv5, ptr getelementptr ([0 x i32], ptr null, i64 0, i64 -14), align 4
28+
%1 = load i8, ptr getelementptr ([9 x i8], ptr null, i64 -2, i64 6), align 1
29+
%tobool1.not.1 = icmp ne i8 %1, 0
30+
%conv2.1 = zext i1 %tobool1.not.1 to i16
31+
store i16 %conv2.1, ptr getelementptr ([0 x i16], ptr null, i64 0, i64 -13), align 2
32+
%conv5.1 = sext i8 %0 to i32
33+
store i32 %conv5.1, ptr getelementptr ([0 x i32], ptr null, i64 0, i64 -13), align 4
34+
ret i32 0
35+
}

0 commit comments

Comments
 (0)