Skip to content

Commit 1d5fbe8

Browse files
committed
[SLP]Adjust NumberOfParts value for adjusted number of buildvector scalars
Need to adjust NumParts value, when GatheredScalars scalars are adjusted after extractelements analysis, to fix compiler crash
1 parent e84f6b6 commit 1d5fbe8

File tree

2 files changed

+275
-0
lines changed

2 files changed

+275
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14898,6 +14898,12 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1489814898
Resized = true;
1489914899
GatheredScalars.append(VF - GatheredScalars.size(),
1490014900
PoisonValue::get(OrigScalarTy));
14901+
NumParts = TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF));
14902+
if (NumParts == 0 || NumParts >= GatheredScalars.size() ||
14903+
VecTy->getNumElements() % NumParts != 0 ||
14904+
!hasFullVectorsOrPowerOf2(*TTI, VecTy->getElementType(),
14905+
VecTy->getNumElements() / NumParts))
14906+
NumParts = 1;
1490114907
}
1490214908
}
1490314909
}
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
3+
4+
define <16 x half> @test(i32 %0, float %1, i32 %2) {
5+
; CHECK-LABEL: define <16 x half> @test(
6+
; CHECK-SAME: i32 [[TMP0:%.*]], float [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
7+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00>, float [[TMP1]], i32 13
8+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32>
9+
; CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer)
10+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP2]], 0
11+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP0]], [[TMP0]]
12+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
13+
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP9]], 0.000000e+00
14+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP0]], 0
15+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP0]], 0
16+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP5]], i32 10
17+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
18+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast float 0.000000e+00 to i32
19+
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
20+
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP0]], 0
21+
; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 0, 0
22+
; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i32 0, 0
23+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i32 0, 0
24+
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 0, 0
25+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP5]], i32 4
26+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP22]], 0
27+
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP0]], [[TMP0]]
28+
; CHECK-NEXT: [[TMP25:%.*]] = bitcast float 0.000000e+00 to i32
29+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP25]], 0
30+
; CHECK-NEXT: [[TMP27:%.*]] = icmp ult <16 x i32> [[TMP5]], zeroinitializer
31+
; CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
32+
; CHECK-NEXT: [[TMP29:%.*]] = sitofp <16 x i32> [[TMP28]] to <16 x float>
33+
; CHECK-NEXT: [[TMP30:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP29]], <16 x float> zeroinitializer, <16 x float> zeroinitializer)
34+
; CHECK-NEXT: [[TMP31:%.*]] = fadd <16 x float> [[TMP30]], zeroinitializer
35+
; CHECK-NEXT: [[TMP32:%.*]] = call <12 x i1> @llvm.vector.insert.v12i1.v2i1(<12 x i1> poison, <2 x i1> zeroinitializer, i64 0)
36+
; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <12 x i1> [[TMP32]], <12 x i1> <i1 poison, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <12 x i32> <i32 0, i32 13, i32 14, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
37+
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <12 x i1> [[TMP33]], <12 x i1> poison, <16 x i32> <i32 0, i32 1, i32 0, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 0, i32 10, i32 11, i32 0>
38+
; CHECK-NEXT: [[TMP35:%.*]] = select <16 x i1> [[TMP34]], <16 x float> zeroinitializer, <16 x float> [[TMP31]]
39+
; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x float> [[TMP35]] to <16 x i32>
40+
; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i32> [[TMP36]], zeroinitializer
41+
; CHECK-NEXT: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <16 x float>
42+
; CHECK-NEXT: [[TMP39:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float poison>, <2 x float> [[TMP6]], i64 14)
43+
; CHECK-NEXT: [[TMP40:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> zeroinitializer, <16 x float> [[TMP38]], <16 x float> [[TMP39]])
44+
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x float> [[TMP29]], i32 0
45+
; CHECK-NEXT: [[TMP42:%.*]] = fcmp olt float [[TMP41]], 0.000000e+00
46+
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP29]], i32 14
47+
; CHECK-NEXT: [[TMP44:%.*]] = fcmp ogt float [[TMP43]], 0.000000e+00
48+
; CHECK-NEXT: [[TMP45:%.*]] = fcmp olt float [[TMP43]], 0.000000e+00
49+
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP29]], i32 13
50+
; CHECK-NEXT: [[TMP47:%.*]] = fcmp ogt float [[TMP46]], 0.000000e+00
51+
; CHECK-NEXT: [[TMP48:%.*]] = fcmp olt float [[TMP46]], 0.000000e+00
52+
; CHECK-NEXT: [[TMP49:%.*]] = fcmp olt float [[TMP41]], 0.000000e+00
53+
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x float> [[TMP29]], i32 1
54+
; CHECK-NEXT: [[TMP51:%.*]] = fcmp ogt float [[TMP50]], 0.000000e+00
55+
; CHECK-NEXT: [[TMP52:%.*]] = fcmp oeq <16 x float> [[TMP40]], zeroinitializer
56+
; CHECK-NEXT: ret <16 x half> zeroinitializer
57+
;
58+
%4 = bitcast float 0.000000e+00 to i32
59+
%5 = fcmp olt float 0.000000e+00, 0.000000e+00
60+
%6 = icmp ult i32 %4, 0
61+
%7 = select i1 %6, i32 0, i32 0
62+
%8 = sitofp i32 %7 to float
63+
%9 = tail call float @llvm.fmuladd.f32(float %8, float 0.000000e+00, float 0.000000e+00)
64+
%10 = fadd float %9, 0.000000e+00
65+
%11 = select i1 %5, float 0.000000e+00, float %10
66+
%12 = bitcast float %11 to i32
67+
%13 = and i32 %12, 0
68+
%14 = bitcast i32 %13 to float
69+
%15 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
70+
%16 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %14, float %15)
71+
%17 = fcmp oeq float %16, 0.000000e+00
72+
%18 = fcmp olt float %8, 0.000000e+00
73+
%19 = icmp ugt i32 %2, 0
74+
%20 = bitcast float 0.000000e+00 to i32
75+
%21 = icmp eq i32 %0, %0
76+
%22 = icmp ult i32 %20, 0
77+
%23 = select i1 %22, i32 0, i32 0
78+
%24 = sitofp i32 %23 to float
79+
%25 = tail call float @llvm.fmuladd.f32(float %24, float 0.000000e+00, float 0.000000e+00)
80+
%26 = fadd float %25, 0.000000e+00
81+
%27 = select i1 false, float 0.000000e+00, float %26
82+
%28 = bitcast float %27 to i32
83+
%29 = and i32 %28, 0
84+
%30 = bitcast i32 %29 to float
85+
%31 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
86+
%32 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %30, float %31)
87+
%33 = fcmp ogt float %24, 0.000000e+00
88+
%34 = fcmp oeq float %32, 0.000000e+00
89+
%35 = fcmp ogt float %31, 0.000000e+00
90+
%36 = fcmp olt float %24, 0.000000e+00
91+
%37 = bitcast float %1 to i32
92+
%38 = icmp ult i32 %37, 0
93+
%39 = select i1 %38, i32 0, i32 0
94+
%40 = sitofp i32 %39 to float
95+
%41 = tail call float @llvm.fmuladd.f32(float %40, float 0.000000e+00, float 0.000000e+00)
96+
%42 = fadd float %41, 0.000000e+00
97+
%43 = select i1 false, float 0.000000e+00, float %42
98+
%44 = bitcast float %43 to i32
99+
%45 = and i32 %44, 0
100+
%46 = bitcast i32 %45 to float
101+
%47 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %46, float 0.000000e+00)
102+
%48 = fadd float 0.000000e+00, 0.000000e+00
103+
%49 = fcmp ogt float %40, 0.000000e+00
104+
%50 = fcmp oeq float %47, 0.000000e+00
105+
%51 = fcmp ogt float %48, 0.000000e+00
106+
%52 = fcmp olt float %40, 0.000000e+00
107+
%53 = icmp eq i32 0, 0
108+
%54 = bitcast float 0.000000e+00 to i32
109+
%55 = icmp eq i32 0, 0
110+
%56 = icmp ult i32 %54, 0
111+
%57 = select i1 %56, i32 0, i32 0
112+
%58 = sitofp i32 %57 to float
113+
%59 = tail call float @llvm.fmuladd.f32(float %58, float 0.000000e+00, float 0.000000e+00)
114+
%60 = fadd float %59, 0.000000e+00
115+
%61 = select i1 %5, float 0.000000e+00, float %60
116+
%62 = bitcast float %61 to i32
117+
%63 = and i32 %62, 0
118+
%64 = bitcast i32 %63 to float
119+
%65 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %64, float 0.000000e+00)
120+
%66 = fcmp oeq float %65, 0.000000e+00
121+
%67 = bitcast float 0.000000e+00 to i32
122+
%68 = icmp eq i32 %0, 0
123+
%69 = icmp ult i32 %67, 0
124+
%70 = select i1 %69, i32 0, i32 0
125+
%71 = sitofp i32 %70 to float
126+
%72 = tail call float @llvm.fmuladd.f32(float %71, float 0.000000e+00, float 0.000000e+00)
127+
%73 = fadd float %72, 0.000000e+00
128+
%74 = select i1 false, float 0.000000e+00, float %73
129+
%75 = bitcast float %74 to i32
130+
%76 = and i32 %75, 0
131+
%77 = bitcast i32 %76 to float
132+
%78 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %77, float 0.000000e+00)
133+
%79 = fadd float 0.000000e+00, 0.000000e+00
134+
%80 = fcmp oeq float %78, 0.000000e+00
135+
%81 = fcmp ogt float %79, 0.000000e+00
136+
%82 = icmp eq i32 %0, 0
137+
%83 = bitcast float 0.000000e+00 to i32
138+
%84 = icmp eq i32 %83, 0
139+
%85 = icmp ult i32 %83, 0
140+
%86 = select i1 %85, i32 0, i32 0
141+
%87 = sitofp i32 %86 to float
142+
%88 = tail call float @llvm.fmuladd.f32(float %87, float 0.000000e+00, float 0.000000e+00)
143+
%89 = fadd float %88, 0.000000e+00
144+
%90 = select i1 false, float 0.000000e+00, float %89
145+
%91 = bitcast float %90 to i32
146+
%92 = and i32 %91, 0
147+
%93 = bitcast i32 %92 to float
148+
%94 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %93, float 0.000000e+00)
149+
%95 = fcmp oeq float %94, 0.000000e+00
150+
%96 = bitcast float 0.000000e+00 to i32
151+
%97 = bitcast float 0.000000e+00 to i32
152+
%98 = icmp ult i32 %97, 0
153+
%99 = select i1 %98, i32 0, i32 0
154+
%100 = sitofp i32 %99 to float
155+
%101 = tail call float @llvm.fmuladd.f32(float %100, float 0.000000e+00, float 0.000000e+00)
156+
%102 = fadd float %101, 0.000000e+00
157+
%103 = select i1 false, float 0.000000e+00, float %102
158+
%104 = bitcast float %103 to i32
159+
%105 = and i32 %104, 0
160+
%106 = bitcast i32 %105 to float
161+
%107 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %106, float 0.000000e+00)
162+
%108 = fcmp oeq float %107, 0.000000e+00
163+
%109 = icmp eq i32 %96, 0
164+
%110 = icmp eq i32 %0, 0
165+
%111 = icmp ult i32 0, 0
166+
%112 = bitcast float 0.000000e+00 to i32
167+
%113 = icmp ult i32 %112, 0
168+
%114 = select i1 %113, i32 0, i32 0
169+
%115 = sitofp i32 %114 to float
170+
%116 = tail call float @llvm.fmuladd.f32(float %115, float 0.000000e+00, float 0.000000e+00)
171+
%117 = fadd float %116, 0.000000e+00
172+
%118 = select i1 false, float 0.000000e+00, float %117
173+
%119 = bitcast float %118 to i32
174+
%120 = and i32 %119, 0
175+
%121 = bitcast i32 %120 to float
176+
%122 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %121, float 0.000000e+00)
177+
%123 = fadd float 0.000000e+00, 0.000000e+00
178+
%124 = fcmp oeq float %122, 0.000000e+00
179+
%125 = fcmp ogt float %123, 0.000000e+00
180+
%126 = icmp ult i32 0, 0
181+
%127 = bitcast float 0.000000e+00 to i32
182+
%128 = icmp ult i32 %127, 0
183+
%129 = select i1 %128, i32 0, i32 0
184+
%130 = sitofp i32 %129 to float
185+
%131 = tail call float @llvm.fmuladd.f32(float %130, float 0.000000e+00, float 0.000000e+00)
186+
%132 = fadd float %131, 0.000000e+00
187+
%133 = select i1 false, float 0.000000e+00, float %132
188+
%134 = bitcast float %133 to i32
189+
%135 = and i32 %134, 0
190+
%136 = bitcast i32 %135 to float
191+
%137 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %136, float 0.000000e+00)
192+
%138 = fcmp oeq float %137, 0.000000e+00
193+
%139 = icmp ult i32 0, 0
194+
%140 = bitcast float 0.000000e+00 to i32
195+
%141 = icmp eq i32 0, 0
196+
%142 = icmp ult i32 %140, 0
197+
%143 = select i1 %142, i32 0, i32 0
198+
%144 = sitofp i32 %143 to float
199+
%145 = tail call float @llvm.fmuladd.f32(float %144, float 0.000000e+00, float 0.000000e+00)
200+
%146 = fadd float %145, 0.000000e+00
201+
%147 = select i1 false, float 0.000000e+00, float %146
202+
%148 = bitcast float %147 to i32
203+
%149 = and i32 %148, 0
204+
%150 = bitcast i32 %149 to float
205+
%151 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %150, float 0.000000e+00)
206+
%152 = fcmp oeq float %151, 0.000000e+00
207+
%153 = fcmp olt float 0.000000e+00, 0.000000e+00
208+
%154 = select i1 %153, float 0.000000e+00, float %10
209+
%155 = bitcast float %154 to i32
210+
%156 = and i32 %155, 0
211+
%157 = bitcast i32 %156 to float
212+
%158 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %157, float 0.000000e+00)
213+
%159 = fcmp oeq float %158, 0.000000e+00
214+
%160 = bitcast float 0.000000e+00 to i32
215+
%161 = icmp eq i32 %160, 0
216+
%162 = icmp ult i32 %160, 0
217+
%163 = select i1 %162, i32 0, i32 0
218+
%164 = sitofp i32 %163 to float
219+
%165 = tail call float @llvm.fmuladd.f32(float %164, float 0.000000e+00, float 0.000000e+00)
220+
%166 = fadd float %165, 0.000000e+00
221+
%167 = select i1 false, float 0.000000e+00, float %166
222+
%168 = bitcast float %167 to i32
223+
%169 = and i32 %168, 0
224+
%170 = bitcast i32 %169 to float
225+
%171 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %170, float 0.000000e+00)
226+
%172 = fcmp oeq float %171, 0.000000e+00
227+
%173 = tail call float @llvm.fmuladd.f32(float %8, float 0.000000e+00, float 0.000000e+00)
228+
%174 = fadd float %173, 0.000000e+00
229+
%175 = select i1 %5, float 0.000000e+00, float %174
230+
%176 = bitcast float %175 to i32
231+
%177 = and i32 %176, 0
232+
%178 = bitcast i32 %177 to float
233+
%179 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %178, float 0.000000e+00)
234+
%180 = fadd float 0.000000e+00, 0.000000e+00
235+
%181 = fcmp oeq float %179, 0.000000e+00
236+
%182 = fcmp ogt float %180, 0.000000e+00
237+
%183 = fcmp olt float %8, 0.000000e+00
238+
%184 = bitcast float 0.000000e+00 to i32
239+
%185 = icmp eq i32 %0, %0
240+
%186 = icmp ult i32 %184, 0
241+
%187 = select i1 %186, i32 0, i32 0
242+
%188 = sitofp i32 %187 to float
243+
%189 = tail call float @llvm.fmuladd.f32(float %188, float 0.000000e+00, float 0.000000e+00)
244+
%190 = fadd float %189, 0.000000e+00
245+
%191 = select i1 %5, float 0.000000e+00, float %190
246+
%192 = bitcast float %191 to i32
247+
%193 = and i32 %192, 0
248+
%194 = bitcast i32 %193 to float
249+
%195 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %194, float 0.000000e+00)
250+
%196 = fcmp oeq float %195, 0.000000e+00
251+
%197 = bitcast float 0.000000e+00 to i32
252+
%198 = icmp eq i32 %197, 0
253+
%199 = icmp ult i32 %197, 0
254+
%200 = select i1 %199, i32 0, i32 0
255+
%201 = sitofp i32 %200 to float
256+
%202 = tail call float @llvm.fmuladd.f32(float %201, float 0.000000e+00, float 0.000000e+00)
257+
%203 = fadd float %202, 0.000000e+00
258+
%204 = select i1 false, float 0.000000e+00, float %203
259+
%205 = bitcast float %204 to i32
260+
%206 = and i32 %205, 0
261+
%207 = bitcast i32 %206 to float
262+
%208 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %207, float 0.000000e+00)
263+
%209 = fcmp oeq float %208, 0.000000e+00
264+
%210 = fcmp ogt float %201, 0.000000e+00
265+
%211 = tail call float @llvm.fmuladd.f32(float 0.000000e+00, float %14, float 0.000000e+00)
266+
%212 = fcmp oeq float %211, 0.000000e+00
267+
ret <16 x half> zeroinitializer
268+
}
269+

0 commit comments

Comments
 (0)