Skip to content

Commit be851c6

Browse files
committed
[X86] Add SSE/AVX1/AVX2 + f16/f32 test coverage to splat(fpext) tests
As discussed on D141657
1 parent 51911a6 commit be851c6

File tree

1 file changed

+241
-9
lines changed

1 file changed

+241
-9
lines changed
Lines changed: 241 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,249 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
36

4-
define <4 x double> @prefer(float* %p) {
5-
; CHECK-LABEL: prefer:
6-
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
8-
; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
9-
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
10-
; CHECK-NEXT: retq
7+
define <2 x double> @prefer_f32_v2f64(ptr %p) nounwind {
8+
; SSE-LABEL: prefer_f32_v2f64:
9+
; SSE: # %bb.0: # %entry
10+
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
11+
; SSE-NEXT: cvtss2sd %xmm0, %xmm0
12+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
13+
; SSE-NEXT: retq
14+
;
15+
; AVX-LABEL: prefer_f32_v2f64:
16+
; AVX: # %bb.0: # %entry
17+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
18+
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
19+
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
20+
; AVX-NEXT: retq
1121
entry:
12-
%0 = load float, float* %p, align 4
22+
%0 = load float, ptr %p, align 4
23+
%vecinit.i = insertelement <2 x float> undef, float %0, i64 0
24+
%vecinit3.i = shufflevector <2 x float> %vecinit.i, <2 x float> poison, <2 x i32> zeroinitializer
25+
%conv.i = fpext <2 x float> %vecinit3.i to <2 x double>
26+
ret <2 x double> %conv.i
27+
}
28+
29+
define <4 x double> @prefer_f32_v4f64(ptr %p) nounwind {
30+
; SSE-LABEL: prefer_f32_v4f64:
31+
; SSE: # %bb.0: # %entry
32+
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33+
; SSE-NEXT: cvtss2sd %xmm0, %xmm0
34+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
35+
; SSE-NEXT: movaps %xmm0, %xmm1
36+
; SSE-NEXT: retq
37+
;
38+
; AVX1-LABEL: prefer_f32_v4f64:
39+
; AVX1: # %bb.0: # %entry
40+
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
41+
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
42+
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
43+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
44+
; AVX1-NEXT: retq
45+
;
46+
; AVX2-LABEL: prefer_f32_v4f64:
47+
; AVX2: # %bb.0: # %entry
48+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
49+
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
50+
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
51+
; AVX2-NEXT: retq
52+
;
53+
; AVX512-LABEL: prefer_f32_v4f64:
54+
; AVX512: # %bb.0: # %entry
55+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
56+
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
57+
; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
58+
; AVX512-NEXT: retq
59+
entry:
60+
%0 = load float, ptr %p, align 4
1361
%vecinit.i = insertelement <4 x float> undef, float %0, i64 0
1462
%vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> poison, <4 x i32> zeroinitializer
1563
%conv.i = fpext <4 x float> %vecinit3.i to <4 x double>
1664
ret <4 x double> %conv.i
1765
}
66+
67+
define <4 x float> @prefer_f16_v4f32(ptr %p) nounwind {
68+
; SSE-LABEL: prefer_f16_v4f32:
69+
; SSE: # %bb.0: # %entry
70+
; SSE-NEXT: pushq %rax
71+
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
72+
; SSE-NEXT: callq __extendhfsf2@PLT
73+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
74+
; SSE-NEXT: popq %rax
75+
; SSE-NEXT: retq
76+
;
77+
; AVX1-LABEL: prefer_f16_v4f32:
78+
; AVX1: # %bb.0: # %entry
79+
; AVX1-NEXT: pushq %rax
80+
; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
81+
; AVX1-NEXT: callq __extendhfsf2@PLT
82+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
83+
; AVX1-NEXT: popq %rax
84+
; AVX1-NEXT: retq
85+
;
86+
; AVX2-LABEL: prefer_f16_v4f32:
87+
; AVX2: # %bb.0: # %entry
88+
; AVX2-NEXT: pushq %rax
89+
; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
90+
; AVX2-NEXT: callq __extendhfsf2@PLT
91+
; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
92+
; AVX2-NEXT: popq %rax
93+
; AVX2-NEXT: retq
94+
;
95+
; AVX512-LABEL: prefer_f16_v4f32:
96+
; AVX512: # %bb.0: # %entry
97+
; AVX512-NEXT: movzwl (%rdi), %eax
98+
; AVX512-NEXT: vmovd %eax, %xmm0
99+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
100+
; AVX512-NEXT: vbroadcastss %xmm0, %xmm0
101+
; AVX512-NEXT: retq
102+
entry:
103+
%0 = load half, ptr %p, align 4
104+
%vecinit.i = insertelement <4 x half> undef, half %0, i64 0
105+
%vecinit3.i = shufflevector <4 x half> %vecinit.i, <4 x half> poison, <4 x i32> zeroinitializer
106+
%conv.i = fpext <4 x half> %vecinit3.i to <4 x float>
107+
ret <4 x float> %conv.i
108+
}
109+
110+
define <8 x float> @prefer_f16_v8f32(ptr %p) nounwind {
111+
; SSE-LABEL: prefer_f16_v8f32:
112+
; SSE: # %bb.0: # %entry
113+
; SSE-NEXT: pushq %rax
114+
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
115+
; SSE-NEXT: callq __extendhfsf2@PLT
116+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
117+
; SSE-NEXT: movaps %xmm0, %xmm1
118+
; SSE-NEXT: popq %rax
119+
; SSE-NEXT: retq
120+
;
121+
; AVX1-LABEL: prefer_f16_v8f32:
122+
; AVX1: # %bb.0: # %entry
123+
; AVX1-NEXT: pushq %rax
124+
; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
125+
; AVX1-NEXT: callq __extendhfsf2@PLT
126+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
127+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
128+
; AVX1-NEXT: popq %rax
129+
; AVX1-NEXT: retq
130+
;
131+
; AVX2-LABEL: prefer_f16_v8f32:
132+
; AVX2: # %bb.0: # %entry
133+
; AVX2-NEXT: pushq %rax
134+
; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
135+
; AVX2-NEXT: callq __extendhfsf2@PLT
136+
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
137+
; AVX2-NEXT: popq %rax
138+
; AVX2-NEXT: retq
139+
;
140+
; AVX512-LABEL: prefer_f16_v8f32:
141+
; AVX512: # %bb.0: # %entry
142+
; AVX512-NEXT: movzwl (%rdi), %eax
143+
; AVX512-NEXT: vmovd %eax, %xmm0
144+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
145+
; AVX512-NEXT: vbroadcastss %xmm0, %ymm0
146+
; AVX512-NEXT: retq
147+
entry:
148+
%0 = load half, ptr %p, align 4
149+
%vecinit.i = insertelement <8 x half> undef, half %0, i64 0
150+
%vecinit3.i = shufflevector <8 x half> %vecinit.i, <8 x half> poison, <8 x i32> zeroinitializer
151+
%conv.i = fpext <8 x half> %vecinit3.i to <8 x float>
152+
ret <8 x float> %conv.i
153+
}
154+
155+
define <2 x double> @prefer_f16_v2f64(ptr %p) nounwind {
156+
; SSE-LABEL: prefer_f16_v2f64:
157+
; SSE: # %bb.0: # %entry
158+
; SSE-NEXT: pushq %rax
159+
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
160+
; SSE-NEXT: callq __extendhfsf2@PLT
161+
; SSE-NEXT: cvtss2sd %xmm0, %xmm0
162+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
163+
; SSE-NEXT: popq %rax
164+
; SSE-NEXT: retq
165+
;
166+
; AVX1-LABEL: prefer_f16_v2f64:
167+
; AVX1: # %bb.0: # %entry
168+
; AVX1-NEXT: pushq %rax
169+
; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
170+
; AVX1-NEXT: callq __extendhfsf2@PLT
171+
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
172+
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
173+
; AVX1-NEXT: popq %rax
174+
; AVX1-NEXT: retq
175+
;
176+
; AVX2-LABEL: prefer_f16_v2f64:
177+
; AVX2: # %bb.0: # %entry
178+
; AVX2-NEXT: pushq %rax
179+
; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
180+
; AVX2-NEXT: callq __extendhfsf2@PLT
181+
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
182+
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
183+
; AVX2-NEXT: popq %rax
184+
; AVX2-NEXT: retq
185+
;
186+
; AVX512-LABEL: prefer_f16_v2f64:
187+
; AVX512: # %bb.0: # %entry
188+
; AVX512-NEXT: movzwl (%rdi), %eax
189+
; AVX512-NEXT: vmovd %eax, %xmm0
190+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
191+
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
192+
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
193+
; AVX512-NEXT: retq
194+
entry:
195+
%0 = load half, ptr %p, align 4
196+
%vecinit.i = insertelement <2 x half> undef, half %0, i64 0
197+
%vecinit3.i = shufflevector <2 x half> %vecinit.i, <2 x half> poison, <2 x i32> zeroinitializer
198+
%conv.i = fpext <2 x half> %vecinit3.i to <2 x double>
199+
ret <2 x double> %conv.i
200+
}
201+
202+
define <4 x double> @prefer_f16_v4f64(ptr %p) nounwind {
203+
; SSE-LABEL: prefer_f16_v4f64:
204+
; SSE: # %bb.0: # %entry
205+
; SSE-NEXT: pushq %rax
206+
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
207+
; SSE-NEXT: callq __extendhfsf2@PLT
208+
; SSE-NEXT: cvtss2sd %xmm0, %xmm0
209+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
210+
; SSE-NEXT: movaps %xmm0, %xmm1
211+
; SSE-NEXT: popq %rax
212+
; SSE-NEXT: retq
213+
;
214+
; AVX1-LABEL: prefer_f16_v4f64:
215+
; AVX1: # %bb.0: # %entry
216+
; AVX1-NEXT: pushq %rax
217+
; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
218+
; AVX1-NEXT: callq __extendhfsf2@PLT
219+
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
220+
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
221+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
222+
; AVX1-NEXT: popq %rax
223+
; AVX1-NEXT: retq
224+
;
225+
; AVX2-LABEL: prefer_f16_v4f64:
226+
; AVX2: # %bb.0: # %entry
227+
; AVX2-NEXT: pushq %rax
228+
; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
229+
; AVX2-NEXT: callq __extendhfsf2@PLT
230+
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
231+
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
232+
; AVX2-NEXT: popq %rax
233+
; AVX2-NEXT: retq
234+
;
235+
; AVX512-LABEL: prefer_f16_v4f64:
236+
; AVX512: # %bb.0: # %entry
237+
; AVX512-NEXT: movzwl (%rdi), %eax
238+
; AVX512-NEXT: vmovd %eax, %xmm0
239+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
240+
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
241+
; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
242+
; AVX512-NEXT: retq
243+
entry:
244+
%0 = load half, ptr %p, align 4
245+
%vecinit.i = insertelement <4 x half> undef, half %0, i64 0
246+
%vecinit3.i = shufflevector <4 x half> %vecinit.i, <4 x half> poison, <4 x i32> zeroinitializer
247+
%conv.i = fpext <4 x half> %vecinit3.i to <4 x double>
248+
ret <4 x double> %conv.i
249+
}

0 commit comments

Comments
 (0)