Skip to content

Commit ac7c816

Browse files
committed
Revert "VectorUtils: mark lrint, llrint as trivially vectorizable (#69945)"
This reverts commit 5bfd89b. It was causing build failures on ffmpeg on i686.
1 parent b6b31e7 commit ac7c816

File tree

4 files changed

+139
-48
lines changed

4 files changed

+139
-48
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
9191
case Intrinsic::canonicalize:
9292
case Intrinsic::fptosi_sat:
9393
case Intrinsic::fptoui_sat:
94-
case Intrinsic::lrint:
95-
case Intrinsic::llrint:
9694
return true;
9795
default:
9896
return false;
@@ -124,8 +122,6 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
124122
switch (ID) {
125123
case Intrinsic::fptosi_sat:
126124
case Intrinsic::fptoui_sat:
127-
case Intrinsic::lrint:
128-
case Intrinsic::llrint:
129125
return OpdIdx == -1 || OpdIdx == 0;
130126
case Intrinsic::is_fpclass:
131127
return OpdIdx == 0;

llvm/test/Transforms/LoopVectorize/intrinsic.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ declare i32 @llvm.lrint.i32.f32(float)
16021602

16031603
define void @lrint_i32_f32(ptr %x, ptr %y, i64 %n) {
16041604
; CHECK-LABEL: @lrint_i32_f32(
1605-
; CHECK: llvm.lrint.v4i32.v4f32
1605+
; CHECK-NOT: llvm.lrint.v4i32.v4f32
16061606
; CHECK: ret void
16071607
;
16081608
entry:
@@ -1628,7 +1628,7 @@ declare i64 @llvm.llrint.i64.f32(float)
16281628

16291629
define void @llrint_i64_f32(ptr %x, ptr %y, i64 %n) {
16301630
; CHECK-LABEL: @llrint_i64_f32(
1631-
; CHECK: llvm.llrint.v4i64.v4f32
1631+
; CHECK-NOT: llvm.llrint.v4i32.v4f32
16321632
; CHECK: ret void
16331633
;
16341634
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll

Lines changed: 135 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,13 @@ define <2 x i32> @lrint_v2i32f32(ptr %a) {
3434
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3535
; CHECK-NEXT: entry:
3636
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> [[TMP0]])
38-
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
37+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
38+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
39+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i32> undef, i32 [[TMP1]], i32 0
40+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
41+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
42+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i32> [[VECINS]], i32 [[TMP2]], i32 1
43+
; CHECK-NEXT: ret <2 x i32> [[VECINS_1]]
3944
;
4045
entry:
4146
%0 = load <2 x float>, ptr %a
@@ -53,8 +58,19 @@ define <4 x i32> @lrint_v4i32f32(ptr %a) {
5358
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5459
; CHECK-NEXT: entry:
5560
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
56-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP0]])
57-
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
61+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
62+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
63+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
64+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
65+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
66+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i32> [[VECINS]], i32 [[TMP2]], i32 1
67+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
68+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
69+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
70+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
71+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
72+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
73+
; CHECK-NEXT: ret <4 x i32> [[VECINS_3]]
5874
;
5975
entry:
6076
%0 = load <4 x float>, ptr %a
@@ -78,8 +94,31 @@ define <8 x i32> @lrint_v8i32f32(ptr %a) {
7894
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
7995
; CHECK-NEXT: entry:
8096
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
81-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP0]])
82-
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
97+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
98+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
99+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
100+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
101+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
102+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i32> [[VECINS]], i32 [[TMP2]], i32 1
103+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
104+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
105+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
106+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
107+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
108+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
109+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
110+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_4]])
111+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i32> [[VECINS_3]], i32 [[TMP5]], i32 4
112+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
113+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_5]])
114+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i32> [[VECINS_4]], i32 [[TMP6]], i32 5
115+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
116+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_6]])
117+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i32> [[VECINS_5]], i32 [[TMP7]], i32 6
118+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
119+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_7]])
120+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i32> [[VECINS_6]], i32 [[TMP8]], i32 7
121+
; CHECK-NEXT: ret <8 x i32> [[VECINS_7]]
83122
;
84123
entry:
85124
%0 = load <8 x float>, ptr %a
@@ -115,8 +154,13 @@ define <2 x i64> @lrint_v2i64f32(ptr %a) {
115154
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
116155
; CHECK-NEXT: entry:
117156
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
118-
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> [[TMP0]])
119-
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
157+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
158+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
159+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
160+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
161+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
162+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
163+
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
120164
;
121165
entry:
122166
%0 = load <2 x float>, ptr %a
@@ -134,8 +178,19 @@ define <4 x i64> @lrint_v4i64f32(ptr %a) {
134178
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
135179
; CHECK-NEXT: entry:
136180
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
137-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP0]])
138-
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
181+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
182+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
183+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
184+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
185+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
186+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
187+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
188+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
189+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
190+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
191+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
192+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
193+
; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
139194
;
140195
entry:
141196
%0 = load <4 x float>, ptr %a
@@ -159,14 +214,31 @@ define <8 x i64> @lrint_v8i64f32(ptr %a) {
159214
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
160215
; CHECK-NEXT: entry:
161216
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
162-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163-
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP1]])
164-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
165-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166-
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP4]])
167-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
168-
; CHECK-NEXT: [[VECINS_71:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
169-
; CHECK-NEXT: ret <8 x i64> [[VECINS_71]]
217+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
218+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
219+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
220+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
221+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
222+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
223+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
224+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
225+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
226+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
227+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
228+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
229+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
230+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_4]])
231+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
232+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
233+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_5]])
234+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
235+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
236+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_6]])
237+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
238+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
239+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_7]])
240+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
241+
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
170242
;
171243
entry:
172244
%0 = load <8 x float>, ptr %a
@@ -202,8 +274,13 @@ define <2 x i64> @llrint_v2i64f32(ptr %a) {
202274
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
203275
; CHECK-NEXT: entry:
204276
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
205-
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> [[TMP0]])
206-
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
277+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
278+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
279+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
280+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
281+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
282+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
283+
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
207284
;
208285
entry:
209286
%0 = load <2 x float>, ptr %a
@@ -221,8 +298,19 @@ define <4 x i64> @llrint_v4i64f32(ptr %a) {
221298
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
222299
; CHECK-NEXT: entry:
223300
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
224-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP0]])
225-
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
301+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
302+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
303+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
304+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
305+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
306+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
307+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
308+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
309+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
310+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
311+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
312+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
313+
; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
226314
;
227315
entry:
228316
%0 = load <4 x float>, ptr %a
@@ -246,14 +334,31 @@ define <8 x i64> @llrint_v8i64f32(ptr %a) {
246334
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
247335
; CHECK-NEXT: entry:
248336
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
249-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250-
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP1]])
251-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
252-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
253-
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP4]])
254-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
255-
; CHECK-NEXT: [[VECINS_71:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
256-
; CHECK-NEXT: ret <8 x i64> [[VECINS_71]]
337+
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
338+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
339+
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
340+
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
341+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
342+
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
343+
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
344+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
345+
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
346+
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
347+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
348+
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
349+
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
350+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_4]])
351+
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
352+
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
353+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_5]])
354+
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
355+
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
356+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_6]])
357+
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
358+
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
359+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_7]])
360+
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
361+
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
257362
;
258363
entry:
259364
%0 = load <8 x float>, ptr %a

llvm/test/Transforms/Scalarizer/intrinsics.ll

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -217,12 +217,7 @@ define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 {
217217

218218
define <2 x i32> @scalarize_lrint(<2 x float> %x) #0 {
219219
; CHECK-LABEL: @scalarize_lrint(
220-
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
221-
; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.lrint.i32.f32(float [[X_I0]])
222-
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
223-
; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[X_I1]])
224-
; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
225-
; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
220+
; CHECK-NEXT: [[RND:%.*]] = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> [[X:%.*]])
226221
; CHECK-NEXT: ret <2 x i32> [[RND]]
227222
;
228223
%rnd = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %x)
@@ -231,12 +226,7 @@ define <2 x i32> @scalarize_lrint(<2 x float> %x) #0 {
231226

232227
define <2 x i32> @scalarize_llrint(<2 x float> %x) #0 {
233228
; CHECK-LABEL: @scalarize_llrint(
234-
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
235-
; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.llrint.i32.f32(float [[X_I0]])
236-
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
237-
; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.llrint.i32.f32(float [[X_I1]])
238-
; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
239-
; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
229+
; CHECK-NEXT: [[RND:%.*]] = call <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float> [[X:%.*]])
240230
; CHECK-NEXT: ret <2 x i32> [[RND]]
241231
;
242232
%rnd = call <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float> %x)

0 commit comments

Comments
 (0)