Skip to content

Commit 8ffdc3b

Browse files
committed
[SLP]Fix a crash when checking a scalar in a reordered buildvector node
Need to check reordered scalars, not the original ones, to correctly check proper scalar.
1 parent 92e02ad commit 8ffdc3b

File tree

2 files changed

+185
-1
lines changed

2 files changed

+185
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15304,7 +15304,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1530415304
for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
1530515305
if (ExtractMask[I] == PoisonMaskElem)
1530615306
continue;
15307-
if (isa<UndefValue>(E->Scalars[I]))
15307+
if (isa<UndefValue>(StoredGS[I]))
1530815308
continue;
1530915309
auto *EI = cast<ExtractElementInst>(StoredGS[I]);
1531015310
Value *VecOp = EI->getVectorOperand();
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mcpu=sifive-x280 < %s | FileCheck %s
3+
4+
%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, i32, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], ptr, ptr, ptr, ptr, ptr, [1200 x %struct.syntaxelement], ptr, ptr, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, ptr, ptr, ptr, ptr, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [15 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32 }
5+
%struct.syntaxelement = type { i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr }
6+
7+
@images = external global %struct.ImageParameters
8+
9+
define fastcc i32 @test(i32 %0, i32 %add111.i.i, <4 x i32> %PredPel.i.sroa.86.72.vec.extract, <4 x i32> %1) {
10+
; CHECK-LABEL: define fastcc i32 @test(
11+
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[ADD111_I_I:%.*]], <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[LOOPARRAY_SROA_24_0_I_I3:%.*]] = ashr i32 [[TMP0]], 1
14+
; CHECK-NEXT: [[SHR143_5_I_I9:%.*]] = ashr i32 [[TMP0]], 1
15+
; CHECK-NEXT: [[ADD1392_I:%.*]] = add i32 [[TMP0]], 1
16+
; CHECK-NEXT: [[PREDPEL_I_SROA_86_80_VEC_EXTRACT59312:%.*]] = extractelement <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT]], i64 0
17+
; CHECK-NEXT: [[MUL1445_I:%.*]] = shl i32 [[TMP0]], 1
18+
; CHECK-NEXT: [[ADD2136_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], [[TMP0]]
19+
; CHECK-NEXT: [[SHR2137_I:%.*]] = lshr i32 [[ADD2136_I]], 1
20+
; CHECK-NEXT: [[CONV2138_I:%.*]] = trunc i32 [[SHR2137_I]] to i16
21+
; CHECK-NEXT: [[ADD2157_I:%.*]] = add i32 [[PREDPEL_I_SROA_86_80_VEC_EXTRACT59312]], 1
22+
; CHECK-NEXT: [[SHR2158_I:%.*]] = lshr i32 [[ADD2157_I]], 1
23+
; CHECK-NEXT: [[CONV2159_I:%.*]] = trunc i32 [[SHR2158_I]] to i16
24+
; CHECK-NEXT: [[ADD2174_I:%.*]] = add i32 [[MUL1445_I]], 2
25+
; CHECK-NEXT: [[SHR2175_I:%.*]] = lshr i32 [[ADD2174_I]], 2
26+
; CHECK-NEXT: [[CONV2176_I:%.*]] = trunc i32 [[SHR2175_I]] to i16
27+
; CHECK-NEXT: [[ADD2190_I:%.*]] = or i32 [[ADD1392_I]], 1
28+
; CHECK-NEXT: [[ADD2191_I:%.*]] = add i32 [[ADD2190_I]], [[TMP0]]
29+
; CHECK-NEXT: [[CONV2193_I:%.*]] = trunc i32 [[ADD2191_I]] to i16
30+
; CHECK-NEXT: [[ADD2203_I:%.*]] = or i32 [[TMP0]], 1
31+
; CHECK-NEXT: [[ADD2204_I:%.*]] = add i32 [[ADD2203_I]], [[TMP0]]
32+
; CHECK-NEXT: [[CONV2206_I:%.*]] = trunc i32 [[ADD2204_I]] to i16
33+
; CHECK-NEXT: [[ADD2235_I16:%.*]] = or i32 [[TMP0]], 1
34+
; CHECK-NEXT: [[ADD2236_I:%.*]] = add i32 [[ADD2235_I16]], 1
35+
; CHECK-NEXT: [[SHR2237_I:%.*]] = lshr i32 [[ADD2236_I]], 1
36+
; CHECK-NEXT: [[CONV2238_I:%.*]] = trunc i32 [[SHR2237_I]] to i16
37+
; CHECK-NEXT: store i16 [[CONV2238_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8196), align 4
38+
; CHECK-NEXT: store i16 [[CONV2238_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8176), align 8
39+
; CHECK-NEXT: [[ADD2258_I:%.*]] = or i32 [[ADD111_I_I]], [[TMP0]]
40+
; CHECK-NEXT: [[SHR2259_I:%.*]] = lshr i32 [[ADD2258_I]], 1
41+
; CHECK-NEXT: [[CONV2260_I:%.*]] = trunc i32 [[SHR2259_I]] to i16
42+
; CHECK-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8212), align 4
43+
; CHECK-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8192), align 8
44+
; CHECK-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8172), align 4
45+
; CHECK-NEXT: [[ADD2280_I:%.*]] = add i32 [[ADD111_I_I]], 1
46+
; CHECK-NEXT: [[SHR2281_I:%.*]] = lshr i32 [[ADD2280_I]], 1
47+
; CHECK-NEXT: [[CONV2282_I:%.*]] = trunc i32 [[SHR2281_I]] to i16
48+
; CHECK-NEXT: store i16 [[CONV2282_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8228), align 4
49+
; CHECK-NEXT: store i16 [[CONV2282_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8208), align 8
50+
; CHECK-NEXT: store i16 [[CONV2282_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8188), align 4
51+
; CHECK-NEXT: [[ADD2302_I:%.*]] = add i32 [[TMP0]], 1
52+
; CHECK-NEXT: [[SHR2303_I:%.*]] = lshr i32 [[ADD2302_I]], 1
53+
; CHECK-NEXT: [[CONV2304_I:%.*]] = trunc i32 [[SHR2303_I]] to i16
54+
; CHECK-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8224), align 8
55+
; CHECK-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8204), align 4
56+
; CHECK-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8184), align 8
57+
; CHECK-NEXT: [[ADD2323_I:%.*]] = add i32 [[TMP0]], 1
58+
; CHECK-NEXT: [[ADD2324_I:%.*]] = or i32 [[ADD2323_I]], [[TMP0]]
59+
; CHECK-NEXT: [[SHR2325_I:%.*]] = lshr i32 [[ADD2324_I]], 1
60+
; CHECK-NEXT: [[CONV2326_I:%.*]] = trunc i32 [[SHR2325_I]] to i16
61+
; CHECK-NEXT: store i16 [[CONV2326_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8220), align 4
62+
; CHECK-NEXT: store i16 [[CONV2326_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8200), align 8
63+
; CHECK-NEXT: [[ADD2342_I:%.*]] = add i32 [[SHR143_5_I_I9]], 1
64+
; CHECK-NEXT: [[SHR2343_I:%.*]] = lshr i32 [[ADD2342_I]], 1
65+
; CHECK-NEXT: [[CONV2344_I:%.*]] = trunc i32 [[SHR2343_I]] to i16
66+
; CHECK-NEXT: store i16 [[CONV2344_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8216), align 8
67+
; CHECK-NEXT: [[ADD2355_I:%.*]] = or i32 [[SHR143_5_I_I9]], 1
68+
; CHECK-NEXT: [[ADD2356_I:%.*]] = add i32 [[ADD2355_I]], [[TMP0]]
69+
; CHECK-NEXT: [[CONV2358_I:%.*]] = trunc i32 [[ADD2356_I]] to i16
70+
; CHECK-NEXT: store i16 [[CONV2358_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8232), align 8
71+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 poison, i32 0>
72+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LOOPARRAY_SROA_24_0_I_I3]], i32 0
73+
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1)
74+
; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[TMP4]], splat (i32 1)
75+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
76+
; CHECK-NEXT: store <2 x i16> [[TMP6]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8180), align 4
77+
; CHECK-NEXT: [[ADD2393_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], 1
78+
; CHECK-NEXT: [[ADD2394_I:%.*]] = add i32 [[ADD2393_I]], [[TMP0]]
79+
; CHECK-NEXT: [[CONV2396_I:%.*]] = trunc i32 [[ADD2394_I]] to i16
80+
; CHECK-NEXT: store i16 [[CONV2396_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8198), align 2
81+
; CHECK-NEXT: store i16 [[CONV2396_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8178), align 2
82+
; CHECK-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8214), align 2
83+
; CHECK-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8194), align 2
84+
; CHECK-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8174), align 2
85+
; CHECK-NEXT: store i16 [[CONV2159_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8230), align 2
86+
; CHECK-NEXT: store i16 [[CONV2159_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8210), align 2
87+
; CHECK-NEXT: store i16 [[CONV2159_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8190), align 2
88+
; CHECK-NEXT: store i16 [[CONV2159_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8170), align 2
89+
; CHECK-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8226), align 2
90+
; CHECK-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8206), align 2
91+
; CHECK-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8186), align 2
92+
; CHECK-NEXT: store i16 [[CONV2193_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8222), align 2
93+
; CHECK-NEXT: store i16 [[CONV2193_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8202), align 2
94+
; CHECK-NEXT: store i16 [[CONV2206_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8218), align 2
95+
; CHECK-NEXT: ret i32 0
96+
;
97+
entry:
98+
%LoopArray.sroa.24.0.i.i3 = ashr i32 %0, 1
99+
%shr143.5.i.i9 = ashr i32 %0, 1
100+
%add1392.i = add i32 %0, 1
101+
%PredPel.i.sroa.86.80.vec.extract59312 = extractelement <4 x i32> %PredPel.i.sroa.86.72.vec.extract, i64 0
102+
%mul1445.i = shl i32 %0, 1
103+
%PredPel.i.sroa.7.4.vec.extract446 = extractelement <4 x i32> %1, i64 0
104+
%add1571.i = add i32 %PredPel.i.sroa.7.4.vec.extract446, 1
105+
%shr1572.i = lshr i32 %add1571.i, 1
106+
%conv1573.i = trunc i32 %shr1572.i to i16
107+
%add2136.i = or i32 %LoopArray.sroa.24.0.i.i3, %0
108+
%shr2137.i = lshr i32 %add2136.i, 1
109+
%conv2138.i = trunc i32 %shr2137.i to i16
110+
%add2157.i = add i32 %PredPel.i.sroa.86.80.vec.extract59312, 1
111+
%shr2158.i = lshr i32 %add2157.i, 1
112+
%conv2159.i = trunc i32 %shr2158.i to i16
113+
%add2174.i = add i32 %mul1445.i, 2
114+
%shr2175.i = lshr i32 %add2174.i, 2
115+
%conv2176.i = trunc i32 %shr2175.i to i16
116+
%add2190.i = or i32 %add1392.i, 1
117+
%add2191.i = add i32 %add2190.i, %0
118+
%conv2193.i = trunc i32 %add2191.i to i16
119+
%add2203.i = or i32 %0, 1
120+
%add2204.i = add i32 %add2203.i, %0
121+
%conv2206.i = trunc i32 %add2204.i to i16
122+
%add2214.i = add i32 %LoopArray.sroa.24.0.i.i3, 1
123+
%shr2215.i = lshr i32 %add2214.i, 1
124+
%conv2216.i = trunc i32 %shr2215.i to i16
125+
store i16 %conv2216.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8180), align 4
126+
%add2235.i16 = or i32 %0, 1
127+
%add2236.i = add i32 %add2235.i16, 1
128+
%shr2237.i = lshr i32 %add2236.i, 1
129+
%conv2238.i = trunc i32 %shr2237.i to i16
130+
store i16 %conv2238.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8196), align 4
131+
store i16 %conv2238.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8176), align 8
132+
%add2258.i = or i32 %add111.i.i, %0
133+
%shr2259.i = lshr i32 %add2258.i, 1
134+
%conv2260.i = trunc i32 %shr2259.i to i16
135+
store i16 %conv2260.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8212), align 4
136+
store i16 %conv2260.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8192), align 8
137+
store i16 %conv2260.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8172), align 4
138+
%add2280.i = add i32 %add111.i.i, 1
139+
%shr2281.i = lshr i32 %add2280.i, 1
140+
%conv2282.i = trunc i32 %shr2281.i to i16
141+
store i16 %conv2282.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8228), align 4
142+
store i16 %conv2282.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8208), align 8
143+
store i16 %conv2282.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8188), align 4
144+
%add2302.i = add i32 %0, 1
145+
%shr2303.i = lshr i32 %add2302.i, 1
146+
%conv2304.i = trunc i32 %shr2303.i to i16
147+
store i16 %conv2304.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8224), align 8
148+
store i16 %conv2304.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8204), align 4
149+
store i16 %conv2304.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8184), align 8
150+
%add2323.i = add i32 %0, 1
151+
%add2324.i = or i32 %add2323.i, %0
152+
%shr2325.i = lshr i32 %add2324.i, 1
153+
%conv2326.i = trunc i32 %shr2325.i to i16
154+
store i16 %conv2326.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8220), align 4
155+
store i16 %conv2326.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8200), align 8
156+
%add2342.i = add i32 %shr143.5.i.i9, 1
157+
%shr2343.i = lshr i32 %add2342.i, 1
158+
%conv2344.i = trunc i32 %shr2343.i to i16
159+
store i16 %conv2344.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8216), align 8
160+
%add2355.i = or i32 %shr143.5.i.i9, 1
161+
%add2356.i = add i32 %add2355.i, %0
162+
%conv2358.i = trunc i32 %add2356.i to i16
163+
store i16 %conv2358.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8232), align 8
164+
store i16 %conv1573.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8182), align 2
165+
%add2393.i = or i32 %LoopArray.sroa.24.0.i.i3, 1
166+
%add2394.i = add i32 %add2393.i, %0
167+
%conv2396.i = trunc i32 %add2394.i to i16
168+
store i16 %conv2396.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8198), align 2
169+
store i16 %conv2396.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8178), align 2
170+
store i16 %conv2138.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8214), align 2
171+
store i16 %conv2138.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8194), align 2
172+
store i16 %conv2138.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8174), align 2
173+
store i16 %conv2159.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8230), align 2
174+
store i16 %conv2159.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8210), align 2
175+
store i16 %conv2159.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8190), align 2
176+
store i16 %conv2159.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8170), align 2
177+
store i16 %conv2176.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8226), align 2
178+
store i16 %conv2176.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8206), align 2
179+
store i16 %conv2176.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8186), align 2
180+
store i16 %conv2193.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8222), align 2
181+
store i16 %conv2193.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8202), align 2
182+
store i16 %conv2206.i, ptr getelementptr inbounds nuw (i8, ptr @images, i64 8218), align 2
183+
ret i32 0
184+
}

0 commit comments

Comments
 (0)