Skip to content

Commit 192c23b

Browse files
committed
[SLP] Add X86 version of non-power-of-2 vectorization tests.
Extra X86 tests for #77790.
1 parent 4ad9f5b commit 192c23b

File tree

4 files changed

+956
-0
lines changed

4 files changed

+956
-0
lines changed
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s
3+
4+
define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
5+
; CHECK-LABEL: @v3_load_i32_mul_by_constant_store(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
8+
; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
9+
; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
10+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
11+
; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
12+
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
13+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
14+
; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
15+
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
16+
; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
17+
; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
18+
; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
19+
; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
20+
; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
25+
%l.src.0 = load i32, ptr %gep.src.0, align 4
26+
%mul.0 = mul nsw i32 %l.src.0, 10
27+
28+
%gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
29+
%l.src.1 = load i32, ptr %gep.src.1, align 4
30+
%mul.1 = mul nsw i32 %l.src.1, 10
31+
32+
%gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
33+
%l.src.2 = load i32, ptr %gep.src.2, align 4
34+
%mul.2 = mul nsw i32 %l.src.2, 10
35+
36+
store i32 %mul.0, ptr %dst
37+
38+
%dst.1 = getelementptr i32, ptr %dst, i32 1
39+
store i32 %mul.1, ptr %dst.1
40+
41+
%dst.2 = getelementptr i32, ptr %dst, i32 2
42+
store i32 %mul.2, ptr %dst.2
43+
44+
ret void
45+
}
46+
47+
define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
48+
; CHECK-LABEL: @v3_load_i32_mul_store(
49+
; CHECK-NEXT: entry:
50+
; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
51+
; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
52+
; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
53+
; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
54+
; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
55+
; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
56+
; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
57+
; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
58+
; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
59+
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
60+
; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
61+
; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
62+
; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
63+
; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
64+
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
65+
; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
66+
; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
67+
; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
68+
; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
69+
; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
70+
; CHECK-NEXT: ret void
71+
;
72+
entry:
73+
%gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
74+
%l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
75+
%gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
76+
%l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
77+
%mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
78+
79+
%gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
80+
%l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
81+
%gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
82+
%l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
83+
%mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
84+
85+
%gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
86+
%l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
87+
%gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
88+
%l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
89+
%mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
90+
91+
store i32 %mul.0, ptr %dst
92+
93+
%dst.1 = getelementptr i32, ptr %dst, i32 1
94+
store i32 %mul.1, ptr %dst.1
95+
96+
%dst.2 = getelementptr i32, ptr %dst, i32 2
97+
store i32 %mul.2, ptr %dst.2
98+
99+
ret void
100+
}
101+
102+
define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
103+
; CHECK-LABEL: @v3_load_i32_mul_add_const_store(
104+
; CHECK-NEXT: entry:
105+
; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
106+
; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
107+
; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
108+
; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
109+
; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
110+
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], 9
111+
; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
112+
; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
113+
; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
114+
; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
115+
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
116+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], 9
117+
; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
118+
; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
119+
; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
120+
; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
121+
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
122+
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
123+
; CHECK-NEXT: store i32 [[ADD_0]], ptr [[DST:%.*]], align 4
124+
; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
125+
; CHECK-NEXT: store i32 [[ADD_1]], ptr [[DST_1]], align 4
126+
; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
127+
; CHECK-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4
128+
; CHECK-NEXT: ret void
129+
;
130+
entry:
131+
%gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
132+
%l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
133+
%gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
134+
%l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
135+
%mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
136+
%add.0 = add i32 %mul.0, 9
137+
138+
%gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
139+
%l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
140+
%gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
141+
%l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
142+
%mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
143+
%add.1 = add i32 %mul.1, 9
144+
145+
%gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
146+
%l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
147+
%gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
148+
%l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
149+
%mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
150+
%add.2 = add i32 %mul.2, 9
151+
152+
store i32 %add.0, ptr %dst
153+
154+
%dst.1 = getelementptr i32, ptr %dst, i32 1
155+
store i32 %add.1, ptr %dst.1
156+
157+
%dst.2 = getelementptr i32, ptr %dst, i32 2
158+
store i32 %add.2, ptr %dst.2
159+
160+
ret void
161+
}
162+
163+
define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
164+
; CHECK-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
165+
; CHECK-NEXT: entry:
166+
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
167+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
168+
; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
169+
; CHECK-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
170+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
171+
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
172+
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
173+
; CHECK-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
174+
; CHECK-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4
175+
; CHECK-NEXT: ret void
176+
;
177+
entry:
178+
%gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
179+
%l.src.0 = load float , ptr %gep.src.0, align 4
180+
%fadd.0 = fadd float %l.src.0, 10.0
181+
182+
%gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
183+
%l.src.1 = load float, ptr %gep.src.1, align 4
184+
%fadd.1 = fadd float %l.src.1, 10.0
185+
186+
%gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
187+
%l.src.2 = load float, ptr %gep.src.2, align 4
188+
%fadd.2 = fadd float %l.src.2, 10.0
189+
190+
store float %fadd.0, ptr %dst
191+
192+
%dst.1 = getelementptr float, ptr %dst, i32 1
193+
store float %fadd.1, ptr %dst.1
194+
195+
%dst.2 = getelementptr float, ptr %dst, i32 2
196+
store float %fadd.2, ptr %dst.2
197+
198+
ret void
199+
}
200+
201+
define void @phi_store3(ptr %dst) {
202+
; CHECK-LABEL: @phi_store3(
203+
; CHECK-NEXT: entry:
204+
; CHECK-NEXT: br label [[EXIT:%.*]]
205+
; CHECK: invoke.cont8.loopexit:
206+
; CHECK-NEXT: br label [[EXIT]]
207+
; CHECK: exit:
208+
; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
209+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
210+
; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
211+
; CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4
212+
; CHECK-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4
213+
; CHECK-NEXT: ret void
214+
;
215+
entry:
216+
br label %exit
217+
218+
invoke.cont8.loopexit: ; No predecessors!
219+
br label %exit
220+
221+
exit:
222+
%p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
223+
%p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
224+
%p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
225+
226+
%dst.1 = getelementptr i32, ptr %dst, i32 1
227+
%dst.2 = getelementptr i32, ptr %dst, i32 2
228+
229+
store i32 %p.0, ptr %dst, align 4
230+
store i32 %p.1, ptr %dst.1, align 4
231+
store i32 %p.2, ptr %dst.2, align 4
232+
ret void
233+
}
234+
235+
define void @store_try_reorder(ptr %dst) {
236+
; CHECK-LABEL: @store_try_reorder(
237+
; CHECK-NEXT: entry:
238+
; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0
239+
; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
240+
; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
241+
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
242+
; CHECK-NEXT: ret void
243+
;
244+
entry:
245+
%add = add i32 0, 0
246+
store i32 %add, ptr %dst, align 4
247+
%add207 = sub i32 0, 0
248+
%arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
249+
store i32 %add207, ptr %arrayidx.i1887, align 4
250+
%add216 = sub i32 0, 0
251+
%arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
252+
store i32 %add216, ptr %arrayidx.i1891, align 4
253+
ret void
254+
}
255+
256+
define void @vec3_fpext_cost(ptr %Colour, float %0) {
257+
; CHECK-LABEL: @vec3_fpext_cost(
258+
; CHECK-NEXT: entry:
259+
; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
260+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
261+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
262+
; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
263+
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
264+
; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
265+
; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
266+
; CHECK-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double
267+
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
268+
; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
269+
; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
270+
; CHECK-NEXT: ret void
271+
;
272+
entry:
273+
%arrayidx72 = getelementptr float, ptr %Colour, i64 1
274+
%arrayidx80 = getelementptr float, ptr %Colour, i64 2
275+
%conv62 = fpext float %0 to double
276+
%1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
277+
%conv66 = fptrunc double %1 to float
278+
store float %conv66, ptr %Colour, align 4
279+
%conv70 = fpext float %0 to double
280+
%2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
281+
%conv74 = fptrunc double %2 to float
282+
store float %conv74, ptr %arrayidx72, align 4
283+
%conv78 = fpext float %0 to double
284+
%3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
285+
%conv82 = fptrunc double %3 to float
286+
store float %conv82, ptr %arrayidx80, align 4
287+
ret void
288+
}
289+
290+
define void @fpext_gather(ptr %dst, double %conv) {
291+
; CHECK-LABEL: @fpext_gather(
292+
; CHECK-NEXT: entry:
293+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0
294+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
295+
; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
296+
; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
297+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
298+
; CHECK-NEXT: store float [[TMP3]], ptr [[LENGTHS]], align 4
299+
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
300+
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4
301+
; CHECK-NEXT: ret void
302+
;
303+
entry:
304+
%conv25 = fptrunc double %conv to float
305+
%Lengths = getelementptr float, ptr %dst, i64 0
306+
store float %conv25, ptr %Lengths, align 4
307+
%arrayidx32 = getelementptr float, ptr %dst, i64 1
308+
store float %conv25, ptr %arrayidx32, align 4
309+
%conv34 = fptrunc double %conv to float
310+
%arrayidx37 = getelementptr float, ptr %dst, i64 2
311+
store float %conv34, ptr %arrayidx37, align 4
312+
ret void
313+
}
314+
315+
declare float @llvm.fmuladd.f32(float, float, float)
316+
317+
declare double @llvm.fmuladd.f64(double, double, double)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s
3+
4+
define void @vec3_vectorize_call(ptr %Colour, float %0) {
5+
; CHECK-LABEL: @vec3_vectorize_call(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4
8+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP1]], <2 x float> zeroinitializer, <2 x float> zeroinitializer)
9+
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[COLOUR]], align 4
10+
; CHECK-NEXT: [[ARRAYIDX99_I1:%.*]] = getelementptr float, ptr [[COLOUR]], i64 2
11+
; CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0:%.*]], float 0.000000e+00, float 0.000000e+00)
12+
; CHECK-NEXT: store float [[TMP3]], ptr [[ARRAYIDX99_I1]], align 4
13+
; CHECK-NEXT: ret void
14+
;
15+
entry:
16+
%1 = load float, ptr %Colour, align 4
17+
%2 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00)
18+
store float %2, ptr %Colour, align 4
19+
%arrayidx91.i = getelementptr float, ptr %Colour, i64 1
20+
%3 = load float, ptr %arrayidx91.i, align 4
21+
%4 = call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00)
22+
store float %4, ptr %arrayidx91.i, align 4
23+
%arrayidx99.i1 = getelementptr float, ptr %Colour, i64 2
24+
%5 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00)
25+
store float %5, ptr %arrayidx99.i1, align 4
26+
ret void
27+
}
28+
29+
define void @vec3_fmuladd_64(ptr %Colour, double %0) {
30+
; CHECK-LABEL: @vec3_fmuladd_64(
31+
; CHECK-NEXT: entry:
32+
; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
33+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0
34+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
35+
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
36+
; CHECK-NEXT: [[TMP4:%.*]] = fptrunc <2 x double> [[TMP3]] to <2 x float>
37+
; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[COLOUR]], align 4
38+
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 0.000000e+00, double 0.000000e+00)
39+
; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP5]] to float
40+
; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
41+
; CHECK-NEXT: ret void
42+
;
43+
entry:
44+
%arrayidx72 = getelementptr float, ptr %Colour, i64 1
45+
%arrayidx80 = getelementptr float, ptr %Colour, i64 2
46+
%1 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00)
47+
%conv66 = fptrunc double %1 to float
48+
store float %conv66, ptr %Colour, align 4
49+
%2 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00)
50+
%conv74 = fptrunc double %2 to float
51+
store float %conv74, ptr %arrayidx72, align 4
52+
%3 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00)
53+
%conv82 = fptrunc double %3 to float
54+
store float %conv82, ptr %arrayidx80, align 4
55+
ret void
56+
}
57+
58+
declare float @llvm.fmuladd.f32(float, float, float)
59+
60+
declare double @llvm.fmuladd.f64(double, double, double)

0 commit comments

Comments
 (0)