Skip to content

Commit f4f4a8b

Browse files
committed
[InstCombine][X86] Add repeated ops demanded elts tests for SSE intrinsics (PR24523)
1 parent ec3bb6c commit f4f4a8b

File tree

3 files changed

+192
-0
lines changed

3 files changed

+192
-0
lines changed

llvm/test/Transforms/InstCombine/X86/x86-sse.ll

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,17 @@ define float @test_add_ss_1(float %a, float %b) {
119119
ret float %7
120120
}
121121

122+
define float @test_add_ss_2(float %a) {
123+
; CHECK-LABEL: @test_add_ss_2(
124+
; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]]
125+
; CHECK-NEXT: ret float [[TMP1]]
126+
;
127+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
128+
%2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1)
129+
%3 = extractelement <4 x float> %2, i32 0
130+
ret float %3
131+
}
132+
122133
define float @test_sub_ss_0(float %a, float %b) {
123134
; CHECK-LABEL: @test_sub_ss_0(
124135
; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
@@ -151,6 +162,17 @@ define float @test_sub_ss_2(float %a, float %b) {
151162
ret float %7
152163
}
153164

165+
define float @test_sub_ss_3(float %a) {
166+
; CHECK-LABEL: @test_sub_ss_3(
167+
; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]]
168+
; CHECK-NEXT: ret float [[TMP1]]
169+
;
170+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
171+
%2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1)
172+
%3 = extractelement <4 x float> %2, i32 0
173+
ret float %3
174+
}
175+
154176
define float @test_mul_ss_0(float %a, float %b) {
155177
; CHECK-LABEL: @test_mul_ss_0(
156178
; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
@@ -183,6 +205,17 @@ define float @test_mul_ss_3(float %a, float %b) {
183205
ret float %7
184206
}
185207

208+
define float @test_mul_ss_4(float %a) {
209+
; CHECK-LABEL: @test_mul_ss_4(
210+
; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]]
211+
; CHECK-NEXT: ret float [[TMP1]]
212+
;
213+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
214+
%2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1)
215+
%3 = extractelement <4 x float> %2, i32 0
216+
ret float %3
217+
}
218+
186219
define float @test_div_ss_0(float %a, float %b) {
187220
; CHECK-LABEL: @test_div_ss_0(
188221
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
@@ -215,6 +248,17 @@ define float @test_div_ss_1(float %a, float %b) {
215248
ret float %7
216249
}
217250

251+
define float @test_div_ss_2(float %a) {
252+
; CHECK-LABEL: @test_div_ss_2(
253+
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]]
254+
; CHECK-NEXT: ret float [[TMP1]]
255+
;
256+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
257+
%2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1)
258+
%3 = extractelement <4 x float> %2, i32 0
259+
ret float %3
260+
}
261+
218262
define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
219263
; CHECK-LABEL: @test_min_ss(
220264
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
@@ -262,6 +306,19 @@ define float @test_min_ss_2(float %a, float %b) {
262306
ret float %7
263307
}
264308

309+
define float @test_min_ss_3(float %a) {
310+
; CHECK-LABEL: @test_min_ss_3(
311+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
312+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
313+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
314+
; CHECK-NEXT: ret float [[TMP3]]
315+
;
316+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
317+
%2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1)
318+
%3 = extractelement <4 x float> %2, i32 0
319+
ret float %3
320+
}
321+
265322
define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
266323
; CHECK-LABEL: @test_max_ss(
267324
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
@@ -309,6 +366,19 @@ define float @test_max_ss_3(float %a, float %b) {
309366
ret float %7
310367
}
311368

369+
define float @test_max_ss_4(float %a) {
370+
; CHECK-LABEL: @test_max_ss_4(
371+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
372+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
373+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
374+
; CHECK-NEXT: ret float [[TMP3]]
375+
;
376+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
377+
%2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1)
378+
%3 = extractelement <4 x float> %2, i32 0
379+
ret float %3
380+
}
381+
312382
define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
313383
; CHECK-LABEL: @test_cmp_ss(
314384
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0)
@@ -356,6 +426,19 @@ define float @test_cmp_ss_1(float %a, float %b) {
356426
ret float %7
357427
}
358428

429+
define float @test_cmp_ss_2(float %a) {
430+
; CHECK-LABEL: @test_cmp_ss_2(
431+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
432+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3)
433+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
434+
; CHECK-NEXT: ret float [[TMP3]]
435+
;
436+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
437+
%2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3)
438+
%3 = extractelement <4 x float> %2, i32 0
439+
ret float %3
440+
}
441+
359442
define i32 @test_comieq_ss_0(float %a, float %b) {
360443
; CHECK-LABEL: @test_comieq_ss_0(
361444
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0

llvm/test/Transforms/InstCombine/X86/x86-sse2.ll

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,17 @@ define double @test_add_sd_1(double %a, double %b) {
5252
ret double %6
5353
}
5454

55+
define double @test_add_sd_2(double %a) {
56+
; CHECK-LABEL: @test_add_sd_2(
57+
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[A:%.*]], [[A]]
58+
; CHECK-NEXT: ret double [[TMP1]]
59+
;
60+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
61+
%2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %1, <2 x double> %1)
62+
%3 = extractelement <2 x double> %2, i32 0
63+
ret double %3
64+
}
65+
5566
define double @test_sub_sd_0(double %a, double %b) {
5667
; CHECK-LABEL: @test_sub_sd_0(
5768
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[B:%.*]]
@@ -79,6 +90,17 @@ define double @test_sub_sd_1(double %a, double %b) {
7990
ret double %6
8091
}
8192

93+
define double @test_sub_sd_2(double %a) {
94+
; CHECK-LABEL: @test_sub_sd_2(
95+
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[A]]
96+
; CHECK-NEXT: ret double [[TMP1]]
97+
;
98+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
99+
%2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %1, <2 x double> %1)
100+
%3 = extractelement <2 x double> %2, i32 0
101+
ret double %3
102+
}
103+
82104
define double @test_mul_sd_0(double %a, double %b) {
83105
; CHECK-LABEL: @test_mul_sd_0(
84106
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[B:%.*]]
@@ -106,6 +128,17 @@ define double @test_mul_sd_1(double %a, double %b) {
106128
ret double %6
107129
}
108130

131+
define double @test_mul_sd_2(double %a) {
132+
; CHECK-LABEL: @test_mul_sd_2(
133+
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[A]]
134+
; CHECK-NEXT: ret double [[TMP1]]
135+
;
136+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
137+
%2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %1, <2 x double> %1)
138+
%3 = extractelement <2 x double> %2, i32 0
139+
ret double %3
140+
}
141+
109142
define double @test_div_sd_0(double %a, double %b) {
110143
; CHECK-LABEL: @test_div_sd_0(
111144
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[B:%.*]]
@@ -133,6 +166,17 @@ define double @test_div_sd_1(double %a, double %b) {
133166
ret double %6
134167
}
135168

169+
define double @test_div_sd_2(double %a) {
170+
; CHECK-LABEL: @test_div_sd_2(
171+
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[A]]
172+
; CHECK-NEXT: ret double [[TMP1]]
173+
;
174+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
175+
%2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %1, <2 x double> %1)
176+
%3 = extractelement <2 x double> %2, i32 0
177+
ret double %3
178+
}
179+
136180
define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
137181
; CHECK-LABEL: @test_min_sd(
138182
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
@@ -173,6 +217,19 @@ define double @test_min_sd_1(double %a, double %b) {
173217
ret double %6
174218
}
175219

220+
define double @test_min_sd_2(double %a) {
221+
; CHECK-LABEL: @test_min_sd_2(
222+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
223+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
224+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
225+
; CHECK-NEXT: ret double [[TMP3]]
226+
;
227+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
228+
%2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %1)
229+
%3 = extractelement <2 x double> %2, i32 0
230+
ret double %3
231+
}
232+
176233
define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
177234
; CHECK-LABEL: @test_max_sd(
178235
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
@@ -213,6 +270,19 @@ define double @test_max_sd_1(double %a, double %b) {
213270
ret double %6
214271
}
215272

273+
define double @test_max_sd_2(double %a) {
274+
; CHECK-LABEL: @test_max_sd_2(
275+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
276+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
277+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
278+
; CHECK-NEXT: ret double [[TMP3]]
279+
;
280+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
281+
%2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %1)
282+
%3 = extractelement <2 x double> %2, i32 0
283+
ret double %3
284+
}
285+
216286
define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
217287
; CHECK-LABEL: @test_cmp_sd(
218288
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i8 0)
@@ -253,6 +323,19 @@ define double @test_cmp_sd_1(double %a, double %b) {
253323
ret double %6
254324
}
255325

326+
define double @test_cmp_sd_2(double %a) {
327+
; CHECK-LABEL: @test_cmp_sd_2(
328+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
329+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]], i8 3)
330+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
331+
; CHECK-NEXT: ret double [[TMP3]]
332+
;
333+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
334+
%2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %1, i8 3)
335+
%3 = extractelement <2 x double> %2, i32 0
336+
ret double %3
337+
}
338+
256339
define i32 @test_comieq_sd_0(double %a, double %b) {
257340
; CHECK-LABEL: @test_comieq_sd_0(
258341
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0

llvm/test/Transforms/InstCombine/X86/x86-sse41.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ define double @test_round_sd_1(double %a, double %b) {
4242
ret double %6
4343
}
4444

45+
define double @test_round_sd_2(double %a) {
46+
; CHECK-LABEL: @test_round_sd_2(
47+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
48+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
49+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
50+
; CHECK-NEXT: ret double [[TMP3]]
51+
;
52+
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
53+
%2 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %1, i32 10)
54+
%3 = extractelement <2 x double> %2, i32 0
55+
ret double %3
56+
}
57+
4558
define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
4659
; CHECK-LABEL: @test_round_ss(
4760
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
@@ -94,5 +107,18 @@ define float @test_round_ss_2(float %a, float %b) {
94107
ret float %r
95108
}
96109

110+
define float @test_round_ss_3(float %a) {
111+
; CHECK-LABEL: @test_round_ss_3(
112+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
113+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
114+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
115+
; CHECK-NEXT: ret float [[TMP3]]
116+
;
117+
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
118+
%2 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %1, <4 x float> %1, i32 10)
119+
%3 = extractelement <4 x float> %2, i32 0
120+
ret float %3
121+
}
122+
97123
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
98124
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

0 commit comments

Comments
 (0)