Skip to content

Commit aa30018

Browse files
authored
SLP/RISCV: add negative test for llrint, increase coverage (#69940)
To follow-up on a06be8a (SLP/RISCV: add negative test for lrint), add a negative test for llvm.llrint as well, and increase the coverage to cover vectors of length 2, 4, and 8, and the i32 variant of lrint, in preparation to get SLPVectorizer to vectorize both lrint and llrint. This is now possible with the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering).
1 parent e696379 commit aa30018

File tree

1 file changed

+326
-0
lines changed
  • llvm/test/Transforms/SLPVectorizer/RISCV

1 file changed

+326
-0
lines changed

llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,150 @@ entry:
2929
ret <4 x float> %vecins.3
3030
}
3131

32+
define <2 x i32> @lrint_v2i32f32(ptr %a) {
33+
; CHECK-LABEL: define <2 x i32> @lrint_v2i32f32(
34+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
35+
; CHECK-NEXT: entry:
36+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
37+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
38+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
39+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i32> undef, i32 [[TMP1]], i32 0
40+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
41+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
42+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i32> [[VECINS]], i32 [[TMP2]], i32 1
43+
; CHECK-NEXT: ret <2 x i32> [[VECINS_1]]
44+
;
45+
entry:
46+
%0 = load <2 x float>, ptr %a
47+
%vecext = extractelement <2 x float> %0, i32 0
48+
%1 = call i32 @llvm.lrint.i32.f32(float %vecext)
49+
%vecins = insertelement <2 x i32> undef, i32 %1, i32 0
50+
%vecext.1 = extractelement <2 x float> %0, i32 1
51+
%2 = call i32 @llvm.lrint.i32.f32(float %vecext.1)
52+
%vecins.1 = insertelement <2 x i32> %vecins, i32 %2, i32 1
53+
ret <2 x i32> %vecins.1
54+
}
55+
56+
define <4 x i32> @lrint_v4i32f32(ptr %a) {
57+
; CHECK-LABEL: define <4 x i32> @lrint_v4i32f32(
58+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
59+
; CHECK-NEXT: entry:
60+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
61+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
62+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
63+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
64+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
65+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
66+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i32> [[VECINS]], i32 [[TMP2]], i32 1
67+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
68+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
69+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
70+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
71+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
72+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
73+
; CHECK-NEXT: ret <4 x i32> [[VECINS_3]]
74+
;
75+
entry:
76+
%0 = load <4 x float>, ptr %a
77+
%vecext = extractelement <4 x float> %0, i32 0
78+
%1 = call i32 @llvm.lrint.i32.f32(float %vecext)
79+
%vecins = insertelement <4 x i32> undef, i32 %1, i32 0
80+
%vecext.1 = extractelement <4 x float> %0, i32 1
81+
%2 = call i32 @llvm.lrint.i32.f32(float %vecext.1)
82+
%vecins.1 = insertelement <4 x i32> %vecins, i32 %2, i32 1
83+
%vecext.2 = extractelement <4 x float> %0, i32 2
84+
%3 = call i32 @llvm.lrint.i32.f32(float %vecext.2)
85+
%vecins.2 = insertelement <4 x i32> %vecins.1, i32 %3, i32 2
86+
%vecext.3 = extractelement <4 x float> %0, i32 3
87+
%4 = call i32 @llvm.lrint.i32.f32(float %vecext.3)
88+
%vecins.3 = insertelement <4 x i32> %vecins.2, i32 %4, i32 3
89+
ret <4 x i32> %vecins.3
90+
}
91+
92+
define <8 x i32> @lrint_v8i32f32(ptr %a) {
93+
; CHECK-LABEL: define <8 x i32> @lrint_v8i32f32(
94+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
95+
; CHECK-NEXT: entry:
96+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
97+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
98+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
99+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
100+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
101+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
102+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i32> [[VECINS]], i32 [[TMP2]], i32 1
103+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
104+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
105+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
106+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
107+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
108+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
109+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
110+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_4]])
111+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i32> [[VECINS_3]], i32 [[TMP5]], i32 4
112+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
113+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_5]])
114+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i32> [[VECINS_4]], i32 [[TMP6]], i32 5
115+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
116+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_6]])
117+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i32> [[VECINS_5]], i32 [[TMP7]], i32 6
118+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
119+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_7]])
120+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i32> [[VECINS_6]], i32 [[TMP8]], i32 7
121+
; CHECK-NEXT: ret <8 x i32> [[VECINS_7]]
122+
;
123+
entry:
124+
%0 = load <8 x float>, ptr %a
125+
%vecext = extractelement <8 x float> %0, i32 0
126+
%1 = call i32 @llvm.lrint.i32.f32(float %vecext)
127+
%vecins = insertelement <8 x i32> undef, i32 %1, i32 0
128+
%vecext.1 = extractelement <8 x float> %0, i32 1
129+
%2 = call i32 @llvm.lrint.i32.f32(float %vecext.1)
130+
%vecins.1 = insertelement <8 x i32> %vecins, i32 %2, i32 1
131+
%vecext.2 = extractelement <8 x float> %0, i32 2
132+
%3 = call i32 @llvm.lrint.i32.f32(float %vecext.2)
133+
%vecins.2 = insertelement <8 x i32> %vecins.1, i32 %3, i32 2
134+
%vecext.3 = extractelement <8 x float> %0, i32 3
135+
%4 = call i32 @llvm.lrint.i32.f32(float %vecext.3)
136+
%vecins.3 = insertelement <8 x i32> %vecins.2, i32 %4, i32 3
137+
%vecext.4 = extractelement <8 x float> %0, i32 4
138+
%5 = call i32 @llvm.lrint.i32.f32(float %vecext.4)
139+
%vecins.4 = insertelement <8 x i32> %vecins.3, i32 %5, i32 4
140+
%vecext.5 = extractelement <8 x float> %0, i32 5
141+
%6 = call i32 @llvm.lrint.i32.f32(float %vecext.5)
142+
%vecins.5 = insertelement <8 x i32> %vecins.4, i32 %6, i32 5
143+
%vecext.6 = extractelement <8 x float> %0, i32 6
144+
%7 = call i32 @llvm.lrint.i32.f32(float %vecext.6)
145+
%vecins.6 = insertelement <8 x i32> %vecins.5, i32 %7, i32 6
146+
%vecext.7 = extractelement <8 x float> %0, i32 7
147+
%8 = call i32 @llvm.lrint.i32.f32(float %vecext.7)
148+
%vecins.7 = insertelement <8 x i32> %vecins.6, i32 %8, i32 7
149+
ret <8 x i32> %vecins.7
150+
}
151+
152+
define <2 x i64> @lrint_v2i64f32(ptr %a) {
153+
; CHECK-LABEL: define <2 x i64> @lrint_v2i64f32(
154+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
155+
; CHECK-NEXT: entry:
156+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
157+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
158+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
159+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
160+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
161+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
162+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
163+
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
164+
;
165+
entry:
166+
%0 = load <2 x float>, ptr %a
167+
%vecext = extractelement <2 x float> %0, i64 0
168+
%1 = call i64 @llvm.lrint.i64.f32(float %vecext)
169+
%vecins = insertelement <2 x i64> undef, i64 %1, i64 0
170+
%vecext.1 = extractelement <2 x float> %0, i64 1
171+
%2 = call i64 @llvm.lrint.i64.f32(float %vecext.1)
172+
%vecins.1 = insertelement <2 x i64> %vecins, i64 %2, i64 1
173+
ret <2 x i64> %vecins.1
174+
}
175+
32176
define <4 x i64> @lrint_v4i64f32(ptr %a) {
33177
; CHECK-LABEL: define <4 x i64> @lrint_v4i64f32(
34178
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
@@ -65,5 +209,187 @@ entry:
65209
ret <4 x i64> %vecins.3
66210
}
67211

212+
define <8 x i64> @lrint_v8i64f32(ptr %a) {
213+
; CHECK-LABEL: define <8 x i64> @lrint_v8i64f32(
214+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
215+
; CHECK-NEXT: entry:
216+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
217+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
218+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
219+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
220+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
221+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
222+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
223+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
224+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
225+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
226+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
227+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
228+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
229+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
230+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_4]])
231+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
232+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
233+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_5]])
234+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
235+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
236+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_6]])
237+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
238+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
239+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_7]])
240+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
241+
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
242+
;
243+
entry:
244+
%0 = load <8 x float>, ptr %a
245+
%vecext = extractelement <8 x float> %0, i64 0
246+
%1 = call i64 @llvm.lrint.i64.f32(float %vecext)
247+
%vecins = insertelement <8 x i64> undef, i64 %1, i64 0
248+
%vecext.1 = extractelement <8 x float> %0, i64 1
249+
%2 = call i64 @llvm.lrint.i64.f32(float %vecext.1)
250+
%vecins.1 = insertelement <8 x i64> %vecins, i64 %2, i64 1
251+
%vecext.2 = extractelement <8 x float> %0, i64 2
252+
%3 = call i64 @llvm.lrint.i64.f32(float %vecext.2)
253+
%vecins.2 = insertelement <8 x i64> %vecins.1, i64 %3, i64 2
254+
%vecext.3 = extractelement <8 x float> %0, i64 3
255+
%4 = call i64 @llvm.lrint.i64.f32(float %vecext.3)
256+
%vecins.3 = insertelement <8 x i64> %vecins.2, i64 %4, i64 3
257+
%vecext.4 = extractelement <8 x float> %0, i64 4
258+
%5 = call i64 @llvm.lrint.i64.f32(float %vecext.4)
259+
%vecins.4 = insertelement <8 x i64> %vecins.3, i64 %5, i64 4
260+
%vecext.5 = extractelement <8 x float> %0, i64 5
261+
%6 = call i64 @llvm.lrint.i64.f32(float %vecext.5)
262+
%vecins.5 = insertelement <8 x i64> %vecins.4, i64 %6, i64 5
263+
%vecext.6 = extractelement <8 x float> %0, i64 6
264+
%7 = call i64 @llvm.lrint.i64.f32(float %vecext.6)
265+
%vecins.6 = insertelement <8 x i64> %vecins.5, i64 %7, i64 6
266+
%vecext.7 = extractelement <8 x float> %0, i64 7
267+
%8 = call i64 @llvm.lrint.i64.f32(float %vecext.7)
268+
%vecins.7 = insertelement <8 x i64> %vecins.6, i64 %8, i64 7
269+
ret <8 x i64> %vecins.7
270+
}
271+
272+
define <2 x i64> @llrint_v2i64f32(ptr %a) {
273+
; CHECK-LABEL: define <2 x i64> @llrint_v2i64f32(
274+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
275+
; CHECK-NEXT: entry:
276+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
277+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
278+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
279+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
280+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
281+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
282+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
283+
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
284+
;
285+
entry:
286+
%0 = load <2 x float>, ptr %a
287+
%vecext = extractelement <2 x float> %0, i64 0
288+
%1 = call i64 @llvm.llrint.i64.f32(float %vecext)
289+
%vecins = insertelement <2 x i64> undef, i64 %1, i64 0
290+
%vecext.1 = extractelement <2 x float> %0, i64 1
291+
%2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
292+
%vecins.1 = insertelement <2 x i64> %vecins, i64 %2, i64 1
293+
ret <2 x i64> %vecins.1
294+
}
295+
296+
define <4 x i64> @llrint_v4i64f32(ptr %a) {
297+
; CHECK-LABEL: define <4 x i64> @llrint_v4i64f32(
298+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
299+
; CHECK-NEXT: entry:
300+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
301+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
302+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
303+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
304+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
305+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
306+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
307+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
308+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
309+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
310+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
311+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
312+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
313+
; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
314+
;
315+
entry:
316+
%0 = load <4 x float>, ptr %a
317+
%vecext = extractelement <4 x float> %0, i64 0
318+
%1 = call i64 @llvm.llrint.i64.f32(float %vecext)
319+
%vecins = insertelement <4 x i64> undef, i64 %1, i64 0
320+
%vecext.1 = extractelement <4 x float> %0, i64 1
321+
%2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
322+
%vecins.1 = insertelement <4 x i64> %vecins, i64 %2, i64 1
323+
%vecext.2 = extractelement <4 x float> %0, i64 2
324+
%3 = call i64 @llvm.llrint.i64.f32(float %vecext.2)
325+
%vecins.2 = insertelement <4 x i64> %vecins.1, i64 %3, i64 2
326+
%vecext.3 = extractelement <4 x float> %0, i64 3
327+
%4 = call i64 @llvm.llrint.i64.f32(float %vecext.3)
328+
%vecins.3 = insertelement <4 x i64> %vecins.2, i64 %4, i64 3
329+
ret <4 x i64> %vecins.3
330+
}
331+
332+
define <8 x i64> @llrint_v8i64f32(ptr %a) {
333+
; CHECK-LABEL: define <8 x i64> @llrint_v8i64f32(
334+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
335+
; CHECK-NEXT: entry:
336+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
337+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
338+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
339+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
340+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
341+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
342+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
343+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
344+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
345+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
346+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
347+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
348+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
349+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
350+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_4]])
351+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
352+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
353+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_5]])
354+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
355+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
356+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_6]])
357+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
358+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
359+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_7]])
360+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
361+
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
362+
;
363+
entry:
364+
%0 = load <8 x float>, ptr %a
365+
%vecext = extractelement <8 x float> %0, i64 0
366+
%1 = call i64 @llvm.llrint.i64.f32(float %vecext)
367+
%vecins = insertelement <8 x i64> undef, i64 %1, i64 0
368+
%vecext.1 = extractelement <8 x float> %0, i64 1
369+
%2 = call i64 @llvm.llrint.i64.f32(float %vecext.1)
370+
%vecins.1 = insertelement <8 x i64> %vecins, i64 %2, i64 1
371+
%vecext.2 = extractelement <8 x float> %0, i64 2
372+
%3 = call i64 @llvm.llrint.i64.f32(float %vecext.2)
373+
%vecins.2 = insertelement <8 x i64> %vecins.1, i64 %3, i64 2
374+
%vecext.3 = extractelement <8 x float> %0, i64 3
375+
%4 = call i64 @llvm.llrint.i64.f32(float %vecext.3)
376+
%vecins.3 = insertelement <8 x i64> %vecins.2, i64 %4, i64 3
377+
%vecext.4 = extractelement <8 x float> %0, i64 4
378+
%5 = call i64 @llvm.llrint.i64.f32(float %vecext.4)
379+
%vecins.4 = insertelement <8 x i64> %vecins.3, i64 %5, i64 4
380+
%vecext.5 = extractelement <8 x float> %0, i64 5
381+
%6 = call i64 @llvm.llrint.i64.f32(float %vecext.5)
382+
%vecins.5 = insertelement <8 x i64> %vecins.4, i64 %6, i64 5
383+
%vecext.6 = extractelement <8 x float> %0, i64 6
384+
%7 = call i64 @llvm.llrint.i64.f32(float %vecext.6)
385+
%vecins.6 = insertelement <8 x i64> %vecins.5, i64 %7, i64 6
386+
%vecext.7 = extractelement <8 x float> %0, i64 7
387+
%8 = call i64 @llvm.llrint.i64.f32(float %vecext.7)
388+
%vecins.7 = insertelement <8 x i64> %vecins.6, i64 %8, i64 7
389+
ret <8 x i64> %vecins.7
390+
}
391+
68392
declare float @llvm.rint.f32(float)
393+
declare i32 @llvm.lrint.i32.f32(float)
69394
declare i64 @llvm.lrint.i64.f32(float)
395+
declare i64 @llvm.llrint.i64.f32(float)

0 commit comments

Comments
 (0)