Skip to content

Commit d991e05

Browse files
committed
[SLP]Fix compiler crash on vectorizing gatehrd loads with different types
Need to check not only parents, but also types for compatible loads, when trying to build the vectorizable sequences. Fixes crash reported in #107461 (comment)
1 parent b98c405 commit d991e05

File tree

2 files changed

+207
-1
lines changed

2 files changed

+207
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6521,7 +6521,8 @@ static void gatherPossiblyVectorizableLoads(
65216521
if (Idx < Start)
65226522
continue;
65236523
ToAdd.clear();
6524-
if (LI->getParent() != Data.front().first->getParent())
6524+
if (LI->getParent() != Data.front().first->getParent() ||
6525+
LI->getType() != Data.front().first->getType())
65256526
continue;
65266527
std::optional<int> Dist =
65276528
getPointersDiff(LI->getType(), LI->getPointerOperand(),
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
3+
4+
define i32 @test(i8 %0) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i8 [[TMP0:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[CMP13_NOT_5:%.*]] = icmp eq i64 0, 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 7), align 1
10+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> <i8 0, i8 poison>, i8 [[TMP1]], i32 1
11+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i8> zeroinitializer, [[TMP2]]
12+
; CHECK-NEXT: [[TMP4:%.*]] = load volatile i8, ptr null, align 8
13+
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
14+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP5]], <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> <i8 0, i8 0, i8 poison, i8 0, i8 0, i8 poison, i8 0, i8 0>, <8 x i32> <i32 8, i32 9, i32 0, i32 11, i32 12, i32 1, i32 14, i32 15>
16+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i8> zeroinitializer, [[TMP7]]
17+
; CHECK-NEXT: [[TEST_STRUCTCOPY_14_S14_CM_COERCE_SROA_2_0_COPYLOAD:%.*]] = load i48, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
18+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i48> <i48 poison, i48 0, i48 0, i48 0>, i48 [[TEST_STRUCTCOPY_14_S14_CM_COERCE_SROA_2_0_COPYLOAD]], i32 0
19+
; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i48> [[TMP9]] to <4 x i8>
20+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> zeroinitializer, [[TMP10]]
21+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(21) null, align 2
22+
; CHECK-NEXT: [[TMP13:%.*]] = load volatile i8, ptr null, align 2
23+
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i8>, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
24+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i8> [[TMP14]], <2 x i8> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison>
25+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 0, i8 0, i8 0>, <8 x i32> <i32 8, i32 poison, i32 10, i32 3, i32 4, i32 13, i32 14, i32 15>
26+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP12]], i32 1
27+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 0, i8 0, i8 0, i8 0>, i8 [[TMP0]], i32 3
28+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP13]], i32 1
29+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP17]], [[TMP19]]
30+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <64 x i1> <i1 poison, i1 false, i1 false, i1 false, i1 false, i1 false, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison>, i1 [[CMP13_NOT_5]], i32 0
31+
; CHECK-NEXT: [[TMP22:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP21]], <8 x i1> [[TMP8]], i64 8)
32+
; CHECK-NEXT: [[TMP23:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP22]], <8 x i1> [[TMP20]], i64 56)
33+
; CHECK-NEXT: [[TMP24:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v4i1(<64 x i1> [[TMP23]], <4 x i1> [[TMP11]], i64 32)
34+
; CHECK-NEXT: [[TMP25:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v2i1(<64 x i1> [[TMP24]], <2 x i1> [[TMP3]], i64 6)
35+
; CHECK-NEXT: [[TMP26:%.*]] = select <64 x i1> [[TMP25]], <64 x i32> zeroinitializer, <64 x i32> zeroinitializer
36+
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> [[TMP26]])
37+
; CHECK-NEXT: ret i32 [[TMP27]]
38+
;
39+
entry:
40+
%cmp13.not.5 = icmp eq i64 0, 0
41+
%add15.5 = select i1 %cmp13.not.5, i32 0, i32 0
42+
%add26.5 = select i1 false, i32 0, i32 0
43+
%add27.5 = or i32 %add15.5, %add26.5
44+
%add37.5 = select i1 false, i32 0, i32 0
45+
%add38.5 = or i32 %add27.5, %add37.5
46+
%add15.6 = select i1 false, i32 0, i32 0
47+
%add16.6 = or i32 %add15.6, %add38.5
48+
%add37.6 = select i1 false, i32 0, i32 0
49+
%add38.6 = or i32 %add16.6, %add37.6
50+
%add15.7 = select i1 false, i32 0, i32 0
51+
%add16.7 = or i32 %add15.7, %add38.6
52+
%cmp23.not.7 = icmp eq i8 0, 0
53+
%add26.7 = select i1 %cmp23.not.7, i32 0, i32 0
54+
%add27.7 = or i32 %add16.7, %add26.7
55+
%1 = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 7), align 1
56+
%cmp34.not.7 = icmp eq i8 %1, 0
57+
%add37.7 = select i1 %cmp34.not.7, i32 0, i32 0
58+
%add38.7 = or i32 %add27.7, %add37.7
59+
%cmp13.not.8 = icmp eq i8 0, 0
60+
%add15.8 = select i1 %cmp13.not.8, i32 0, i32 0
61+
%add16.8 = or i32 %add15.8, %add38.7
62+
%2 = load volatile i8, ptr null, align 8
63+
%cmp23.not.8 = icmp eq i8 0, 0
64+
%add26.8 = select i1 %cmp23.not.8, i32 0, i32 0
65+
%add27.8 = or i32 %add16.8, %add26.8
66+
%3 = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
67+
%cmp34.not.8 = icmp eq i8 %3, 0
68+
%add37.8 = select i1 %cmp34.not.8, i32 0, i32 0
69+
%add38.8 = or i32 %add27.8, %add37.8
70+
%cmp13.not.9 = icmp eq i8 0, 0
71+
%add15.9 = select i1 %cmp13.not.9, i32 0, i32 0
72+
%add16.9 = or i32 %add15.9, %add38.8
73+
%cmp23.not.9 = icmp eq i8 0, 0
74+
%add26.9 = select i1 %cmp23.not.9, i32 0, i32 0
75+
%add27.9 = or i32 %add16.9, %add26.9
76+
%4 = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 9), align 1
77+
%cmp34.not.9 = icmp eq i8 %4, 0
78+
%add37.9 = select i1 %cmp34.not.9, i32 0, i32 0
79+
%add38.9 = or i32 %add27.9, %add37.9
80+
%cmp13.not.10 = icmp eq i8 0, 0
81+
%add15.10 = select i1 %cmp13.not.10, i32 0, i32 0
82+
%add16.10 = or i32 %add15.10, %add38.9
83+
%cmp23.not.10 = icmp eq i8 0, 0
84+
%add26.10 = select i1 %cmp23.not.10, i32 0, i32 0
85+
%add27.10 = or i32 %add16.10, %add26.10
86+
%add37.10 = select i1 false, i32 0, i32 0
87+
%add38.10 = or i32 %add27.10, %add37.10
88+
%add15.11 = select i1 false, i32 0, i32 0
89+
%add16.11 = or i32 %add15.11, %add38.10
90+
%add26.11 = select i1 false, i32 0, i32 0
91+
%add27.11 = or i32 %add16.11, %add26.11
92+
%add37.11 = select i1 false, i32 0, i32 0
93+
%add38.11 = or i32 %add27.11, %add37.11
94+
%add15.12 = select i1 false, i32 0, i32 0
95+
%add16.12 = or i32 %add15.12, %add38.11
96+
%add26.12 = select i1 false, i32 0, i32 0
97+
%add27.12 = or i32 %add16.12, %add26.12
98+
%add37.12 = select i1 false, i32 0, i32 0
99+
%add38.12 = or i32 %add27.12, %add37.12
100+
%add15.13 = select i1 false, i32 0, i32 0
101+
%add16.13 = or i32 %add15.13, %add38.12
102+
%add26.13 = select i1 false, i32 0, i32 0
103+
%add27.13 = or i32 %add16.13, %add26.13
104+
%add37.13 = select i1 false, i32 0, i32 0
105+
%add38.13 = or i32 %add27.13, %add37.13
106+
%test_structcopy_14.s14_cm.coerce.sroa.2.0.copyload = load i48, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
107+
%s.sroa.9.8.extract.trunc.i198 = trunc i48 %test_structcopy_14.s14_cm.coerce.sroa.2.0.copyload to i8
108+
%s.sroa.11.8.extract.trunc.i200 = trunc i48 0 to i8
109+
%s.sroa.12.8.extract.trunc.i202 = trunc i48 0 to i8
110+
%s.sroa.13.8.extract.trunc.i204 = trunc i48 0 to i8
111+
%add.i210 = select i1 false, i32 0, i32 0
112+
%add6.i211 = or i32 %add.i210, %add38.13
113+
%add.1.i213 = select i1 false, i32 0, i32 0
114+
%add6.1.i214 = or i32 %add6.i211, %add.1.i213
115+
%add.2.i216 = select i1 false, i32 0, i32 0
116+
%add6.2.i217 = or i32 %add6.1.i214, %add.2.i216
117+
%add.4.i222 = select i1 false, i32 0, i32 0
118+
%add6.4.i223 = or i32 %add6.2.i217, %add.4.i222
119+
%add.5.i225 = select i1 false, i32 0, i32 0
120+
%add6.5.i226 = or i32 %add6.4.i223, %add.5.i225
121+
%add.7.i231 = select i1 false, i32 0, i32 0
122+
%add6.7.i232 = or i32 %add6.5.i226, %add.7.i231
123+
%cmp4.not.8.i233 = icmp eq i8 0, %s.sroa.9.8.extract.trunc.i198
124+
%add.8.i234 = select i1 %cmp4.not.8.i233, i32 0, i32 0
125+
%add6.8.i235 = or i32 %add6.7.i232, %add.8.i234
126+
%cmp4.not.9.i236 = icmp eq i8 0, %s.sroa.11.8.extract.trunc.i200
127+
%add.9.i237 = select i1 %cmp4.not.9.i236, i32 0, i32 0
128+
%add6.9.i238 = or i32 %add6.8.i235, %add.9.i237
129+
%cmp4.not.10.i239 = icmp eq i8 0, %s.sroa.12.8.extract.trunc.i202
130+
%add.10.i240 = select i1 %cmp4.not.10.i239, i32 0, i32 0
131+
%add6.10.i241 = or i32 %add6.9.i238, %add.10.i240
132+
%cmp4.not.11.i242 = icmp eq i8 0, %s.sroa.13.8.extract.trunc.i204
133+
%add.11.i243 = select i1 %cmp4.not.11.i242, i32 0, i32 0
134+
%add6.11.i244 = or i32 %add6.10.i241, %add.11.i243
135+
%add.12.i246 = select i1 false, i32 0, i32 0
136+
%add6.12.i247 = or i32 %add6.11.i244, %add.12.i246
137+
%add.13.i249 = select i1 false, i32 0, i32 0
138+
%add6.13.i250 = or i32 %add6.12.i247, %add.13.i249
139+
%add65 = select i1 false, i32 0, i32 0
140+
%add66 = or i32 %add65, %add6.13.i250
141+
%add65.1 = select i1 false, i32 0, i32 0
142+
%add66.1 = or i32 %add65.1, %add66
143+
%add65.2 = select i1 false, i32 0, i32 0
144+
%add66.2 = or i32 %add65.2, %add66.1
145+
%add65.3 = select i1 false, i32 0, i32 0
146+
%add66.3 = or i32 %add65.3, %add66.2
147+
%add65.4 = select i1 false, i32 0, i32 0
148+
%add66.4 = or i32 %add65.4, %add66.3
149+
%add65.5 = select i1 false, i32 0, i32 0
150+
%add66.5 = or i32 %add65.5, %add66.4
151+
%add65.6 = select i1 false, i32 0, i32 0
152+
%add66.6 = or i32 %add65.6, %add66.5
153+
%add65.7 = select i1 false, i32 0, i32 0
154+
%add66.7 = or i32 %add65.7, %add66.6
155+
%add65.8 = select i1 false, i32 0, i32 0
156+
%add66.8 = or i32 %add65.8, %add66.7
157+
%add65.9 = select i1 false, i32 0, i32 0
158+
%add66.9 = or i32 %add65.9, %add66.8
159+
%add65.10 = select i1 false, i32 0, i32 0
160+
%add66.10 = or i32 %add65.10, %add66.9
161+
%add65.11 = select i1 false, i32 0, i32 0
162+
%add66.11 = or i32 %add65.11, %add66.10
163+
%add65.12 = select i1 false, i32 0, i32 0
164+
%add66.12 = or i32 %add65.12, %add66.11
165+
%add65.13 = select i1 false, i32 0, i32 0
166+
%add66.13 = or i32 %add65.13, %add66.12
167+
%add.i254 = select i1 false, i32 0, i32 0
168+
%add6.i255 = or i32 %add.i254, %add66.13
169+
%add.1.i257 = select i1 false, i32 0, i32 0
170+
%add6.1.i258 = or i32 %add6.i255, %add.1.i257
171+
%add.2.i260 = select i1 false, i32 0, i32 0
172+
%add6.2.i261 = or i32 %add6.1.i258, %add.2.i260
173+
%add.3.i263 = select i1 false, i32 0, i32 0
174+
%add6.3.i264 = or i32 %add6.2.i261, %add.3.i263
175+
%cmp4.not.4.i265 = icmp eq i8 0, 0
176+
%add.4.i266 = select i1 %cmp4.not.4.i265, i32 0, i32 0
177+
%add6.4.i267 = or i32 %add6.3.i264, %add.4.i266
178+
%5 = load i8, ptr addrspace(21) null, align 2
179+
%6 = load volatile i8, ptr null, align 2
180+
%cmp4.not.6.i271 = icmp eq i8 %5, %6
181+
%add.6.i272 = select i1 %cmp4.not.6.i271, i32 0, i32 0
182+
%add6.6.i273 = or i32 %add6.4.i267, %add.6.i272
183+
%cmp4.not.7.i274 = icmp eq i8 0, 0
184+
%add.7.i275 = select i1 %cmp4.not.7.i274, i32 0, i32 0
185+
%add6.7.i276 = or i32 %add6.6.i273, %add.7.i275
186+
%7 = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8
187+
%cmp4.not.8.i277 = icmp eq i8 %7, %0
188+
%add.8.i278 = select i1 %cmp4.not.8.i277, i32 0, i32 0
189+
%add6.8.i279 = or i32 %add6.7.i276, %add.8.i278
190+
%8 = load i8, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 9), align 1
191+
%cmp4.not.9.i280 = icmp eq i8 %8, 0
192+
%add.9.i281 = select i1 %cmp4.not.9.i280, i32 0, i32 0
193+
%add6.9.i282 = or i32 %add6.8.i279, %add.9.i281
194+
%cmp4.not.10.i283 = icmp eq i8 0, 0
195+
%add.10.i284 = select i1 %cmp4.not.10.i283, i32 0, i32 0
196+
%add6.10.i285 = or i32 %add6.9.i282, %add.10.i284
197+
%cmp4.not.12.i289 = icmp eq i8 0, 0
198+
%add.12.i290 = select i1 %cmp4.not.12.i289, i32 0, i32 0
199+
%add6.12.i291 = or i32 %add6.10.i285, %add.12.i290
200+
%cmp4.not.7.i318 = icmp eq i8 0, 0
201+
%add.7.i319 = select i1 %cmp4.not.7.i318, i32 0, i32 0
202+
%add6.7.i320 = or i32 %add6.12.i291, %add.7.i319
203+
ret i32 %add6.7.i320
204+
}
205+

0 commit comments

Comments
 (0)