Skip to content

Commit f953b5e

Browse files
committed
[SLP]Relax assertion about subvectors mask size
SubVectorsMask might be less than CommonMask, if the vectors with larger number of elements are permuted or reused elements are used. Need to consider this when estimation/building the vector to avoid compiler crash Fixes #117518
1 parent 3de2147 commit f953b5e

File tree

2 files changed

+104
-3
lines changed

2 files changed

+104
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10875,9 +10875,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1087510875
CommonMask[Idx] = Idx;
1087610876
// Add subvectors permutation cost.
1087710877
if (!SubVectorsMask.empty()) {
10878-
assert(SubVectorsMask.size() == CommonMask.size() &&
10878+
assert(SubVectorsMask.size() <= CommonMask.size() &&
1087910879
"Expected same size of masks for subvectors and common mask.");
10880-
SmallVector<int> SVMask(SubVectorsMask.begin(), SubVectorsMask.end());
10880+
SmallVector<int> SVMask(CommonMask.size(), PoisonMaskElem);
10881+
copy(SubVectorsMask, SVMask.begin());
1088110882
for (auto [I1, I2] : zip(SVMask, CommonMask)) {
1088210883
if (I2 != PoisonMaskElem) {
1088310884
assert(I1 == PoisonMaskElem && "Expected unused subvectors mask");
@@ -14372,7 +14373,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1437214373
if (SubVectorsMask.empty()) {
1437314374
Vec = CreateSubVectors(Vec, CommonMask);
1437414375
} else {
14375-
SmallVector<int> SVMask(SubVectorsMask.begin(), SubVectorsMask.end());
14376+
SmallVector<int> SVMask(CommonMask.size(), PoisonMaskElem);
14377+
copy(SubVectorsMask, SVMask.begin());
1437614378
for (auto [I1, I2] : zip(SVMask, CommonMask)) {
1437714379
if (I2 != PoisonMaskElem) {
1437814380
assert(I1 == PoisonMaskElem && "Expected unused subvectors mask");
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-300 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1
7+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[XOR108_I_I_I]], i32 2
8+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i64> [[TMP1]], zeroinitializer
9+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3
10+
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0)
11+
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v4i64(<16 x i64> [[TMP4]], <4 x i64> [[TMP2]], i64 8)
12+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison>
13+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3, i32 7, i32 8, i32 9, i32 3, i32 10, i32 11, i32 12, i32 3>
14+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1>
15+
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer
16+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]]
17+
; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16>
18+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer
19+
; CHECK-NEXT: ret void
20+
;
21+
%xor108.i.i.i = xor i64 0, 1
22+
%conv115.i.i.i = trunc i64 %xor108.i.i.i to i16
23+
%add.i.i.i.i = or i16 %conv115.i.i.i, 0
24+
%add.i.frozen.i.i.i = freeze i16 %add.i.i.i.i
25+
%.cmp.not.i.i.i = icmp eq i16 %add.i.frozen.i.i.i, 0
26+
%cond.i1002.1.i.i.i = lshr i64 0, 0
27+
%conv115.1.i.i.i = trunc i64 %cond.i1002.1.i.i.i to i16
28+
%add.i.1.i.i.i = or i16 %conv115.1.i.i.i, 0
29+
%add.i.frozen.1.i.i.i = freeze i16 %add.i.1.i.i.i
30+
%.cmp.not.1.i.i.i = icmp eq i16 %add.i.frozen.1.i.i.i, 0
31+
%cond.i1002.2.i.i.i = lshr i64 %xor108.i.i.i, 0
32+
%conv115.2.i.i.i = trunc i64 %cond.i1002.2.i.i.i to i16
33+
%add.i.2.i.i.i = or i16 %conv115.2.i.i.i, 0
34+
%add.i.frozen.2.i.i.i = freeze i16 %add.i.2.i.i.i
35+
%.cmp.not.2.i.i.i = icmp eq i16 %add.i.frozen.2.i.i.i, 0
36+
%cond.i1002.3.i.i.i = lshr i64 0, 0
37+
%conv115.3.i.i.i = trunc i64 %cond.i1002.3.i.i.i to i16
38+
%add.i.3.i.i.i = or i16 %conv115.3.i.i.i, 0
39+
%add.i.frozen.3.i.i.i = freeze i16 %add.i.3.i.i.i
40+
%.cmp.not.3.i.i.i = icmp eq i16 %add.i.frozen.3.i.i.i, 0
41+
%conv115.i.i.i.1 = trunc i64 %xor108.i.i.i to i16
42+
%add.i.i.i.i.1 = or i16 %conv115.i.i.i.1, 0
43+
%add.i.frozen.i.i.i.1 = freeze i16 %add.i.i.i.i.1
44+
%.cmp.not.i.i.i.1 = icmp eq i16 %add.i.frozen.i.i.i.1, 0
45+
%cond.i1002.1.i.i.i.1 = lshr i64 0, 0
46+
%conv115.1.i.i.i.1 = trunc i64 %cond.i1002.1.i.i.i.1 to i16
47+
%add.i.1.i.i.i.1 = or i16 %conv115.1.i.i.i.1, 0
48+
%add.i.frozen.1.i.i.i.1 = freeze i16 %add.i.1.i.i.i.1
49+
%.cmp.not.1.i.i.i.1 = icmp eq i16 %add.i.frozen.1.i.i.i.1, 0
50+
%cond.i1002.2.i.i.i.1 = lshr i64 0, 0
51+
%conv115.2.i.i.i.1 = trunc i64 %cond.i1002.2.i.i.i.1 to i16
52+
%add.i.2.i.i.i.1 = or i16 %conv115.2.i.i.i.1, 0
53+
%add.i.frozen.2.i.i.i.1 = freeze i16 %add.i.2.i.i.i.1
54+
%.cmp.not.2.i.i.i.1 = icmp eq i16 %add.i.frozen.2.i.i.i.1, 0
55+
%cond.i1002.3.i.i.i.1 = lshr i64 0, 0
56+
%conv115.3.i.i.i.1 = trunc i64 %cond.i1002.3.i.i.i.1 to i16
57+
%add.i.3.i.i.i.1 = or i16 %conv115.3.i.i.i.1, 0
58+
%add.i.frozen.3.i.i.i.1 = freeze i16 %add.i.3.i.i.i.1
59+
%.cmp.not.3.i.i.i.1 = icmp eq i16 %add.i.frozen.3.i.i.i.1, 0
60+
%conv115.i.i.i.2 = trunc i64 %xor108.i.i.i to i16
61+
%add.i.i.i.i.2 = or i16 %conv115.i.i.i.2, 0
62+
%add.i.frozen.i.i.i.2 = freeze i16 %add.i.i.i.i.2
63+
%.cmp.not.i.i.i.2 = icmp eq i16 %add.i.frozen.i.i.i.2, 0
64+
%cond.i1002.1.i.i.i.2 = lshr i64 0, 0
65+
%conv115.1.i.i.i.2 = trunc i64 %cond.i1002.1.i.i.i.2 to i16
66+
%add.i.1.i.i.i.2 = or i16 %conv115.1.i.i.i.2, 0
67+
%add.i.frozen.1.i.i.i.2 = freeze i16 %add.i.1.i.i.i.2
68+
%.cmp.not.1.i.i.i.2 = icmp eq i16 %add.i.frozen.1.i.i.i.2, 0
69+
%cond.i1002.2.i.i.i.2 = lshr i64 0, 0
70+
%conv115.2.i.i.i.2 = trunc i64 %cond.i1002.2.i.i.i.2 to i16
71+
%add.i.2.i.i.i.2 = or i16 %conv115.2.i.i.i.2, 0
72+
%add.i.frozen.2.i.i.i.2 = freeze i16 %add.i.2.i.i.i.2
73+
%.cmp.not.2.i.i.i.2 = icmp eq i16 %add.i.frozen.2.i.i.i.2, 0
74+
%cond.i1002.3.i.i.i.2 = lshr i64 0, 0
75+
%conv115.3.i.i.i.2 = trunc i64 %cond.i1002.3.i.i.i.2 to i16
76+
%add.i.3.i.i.i.2 = or i16 %conv115.3.i.i.i.2, 0
77+
%add.i.frozen.3.i.i.i.2 = freeze i16 %add.i.3.i.i.i.2
78+
%.cmp.not.3.i.i.i.2 = icmp eq i16 %add.i.frozen.3.i.i.i.2, 0
79+
%conv115.i.i.i.3 = trunc i64 %xor108.i.i.i to i16
80+
%add.i.i.i.i.3 = or i16 %conv115.i.i.i.3, 0
81+
%add.i.frozen.i.i.i.3 = freeze i16 %add.i.i.i.i.3
82+
%.cmp.not.i.i.i.3 = icmp eq i16 %add.i.frozen.i.i.i.3, 0
83+
%cond.i1002.1.i.i.i.3 = lshr i64 0, 0
84+
%conv115.1.i.i.i.3 = trunc i64 %cond.i1002.1.i.i.i.3 to i16
85+
%add.i.1.i.i.i.3 = or i16 %conv115.1.i.i.i.3, 0
86+
%add.i.frozen.1.i.i.i.3 = freeze i16 %add.i.1.i.i.i.3
87+
%.cmp.not.1.i.i.i.3 = icmp eq i16 %add.i.frozen.1.i.i.i.3, 0
88+
%cond.i1002.2.i.i.i.3 = lshr i64 0, 0
89+
%conv115.2.i.i.i.3 = trunc i64 %cond.i1002.2.i.i.i.3 to i16
90+
%add.i.2.i.i.i.3 = or i16 %conv115.2.i.i.i.3, 0
91+
%add.i.frozen.2.i.i.i.3 = freeze i16 %add.i.2.i.i.i.3
92+
%.cmp.not.2.i.i.i.3 = icmp eq i16 %add.i.frozen.2.i.i.i.3, 0
93+
%cond.i1002.3.i.i.i.3 = lshr i64 0, 0
94+
%conv115.3.i.i.i.3 = trunc i64 %cond.i1002.3.i.i.i.3 to i16
95+
%add.i.3.i.i.i.3 = or i16 %conv115.3.i.i.i.3, 0
96+
%add.i.frozen.3.i.i.i.3 = freeze i16 %add.i.3.i.i.i.3
97+
%.cmp.not.3.i.i.i.3 = icmp eq i16 %add.i.frozen.3.i.i.i.3, 0
98+
ret void
99+
}

0 commit comments

Comments
 (0)