|
9 | 9 | define <2 x double> @mask_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
|
10 | 10 | ; X32-SSE-LABEL: mask_sitofp_2i64_2f64:
|
11 | 11 | ; X32-SSE: # BB#0:
|
12 |
| -; X32-SSE-NEXT: pushl %ebp |
13 |
| -; X32-SSE-NEXT: movl %esp, %ebp |
14 |
| -; X32-SSE-NEXT: andl $-8, %esp |
15 |
| -; X32-SSE-NEXT: subl $32, %esp |
| 12 | +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
16 | 13 | ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
|
17 |
| -; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm0[0],zero |
18 |
| -; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) |
19 |
| -; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
20 |
| -; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) |
21 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
22 |
| -; X32-SSE-NEXT: fstpl {{[0-9]+}}(%esp) |
23 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
24 |
| -; X32-SSE-NEXT: fstpl (%esp) |
25 |
| -; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
26 |
| -; X32-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
27 |
| -; X32-SSE-NEXT: movl %ebp, %esp |
28 |
| -; X32-SSE-NEXT: popl %ebp |
| 14 | +; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 |
29 | 15 | ; X32-SSE-NEXT: retl
|
30 | 16 | ;
|
31 | 17 | ; X32-AVX-LABEL: mask_sitofp_2i64_2f64:
|
32 | 18 | ; X32-AVX: # BB#0:
|
33 |
| -; X32-AVX-NEXT: pushl %ebp |
34 |
| -; X32-AVX-NEXT: movl %esp, %ebp |
35 |
| -; X32-AVX-NEXT: andl $-8, %esp |
36 |
| -; X32-AVX-NEXT: subl $32, %esp |
37 |
| -; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 |
38 |
| -; X32-AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm0[0],zero |
39 |
| -; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) |
40 |
| -; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
41 |
| -; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) |
42 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
43 |
| -; X32-AVX-NEXT: fstpl {{[0-9]+}}(%esp) |
44 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
45 |
| -; X32-AVX-NEXT: fstpl (%esp) |
46 |
| -; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
47 |
| -; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
48 |
| -; X32-AVX-NEXT: movl %ebp, %esp |
49 |
| -; X32-AVX-NEXT: popl %ebp |
| 19 | +; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] |
| 20 | +; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 |
50 | 21 | ; X32-AVX-NEXT: retl
|
51 | 22 | ;
|
52 | 23 | ; X64-SSE-LABEL: mask_sitofp_2i64_2f64:
|
53 | 24 | ; X64-SSE: # BB#0:
|
| 25 | +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
54 | 26 | ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
55 |
| -; X64-SSE-NEXT: movd %xmm0, %rax |
56 |
| -; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1 |
57 |
| -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
58 |
| -; X64-SSE-NEXT: movd %xmm0, %rax |
59 |
| -; X64-SSE-NEXT: xorps %xmm0, %xmm0 |
60 |
| -; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm0 |
61 |
| -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] |
62 |
| -; X64-SSE-NEXT: movapd %xmm1, %xmm0 |
| 27 | +; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 |
63 | 28 | ; X64-SSE-NEXT: retq
|
64 | 29 | ;
|
65 | 30 | ; X64-AVX-LABEL: mask_sitofp_2i64_2f64:
|
66 | 31 | ; X64-AVX: # BB#0:
|
67 |
| -; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 |
68 |
| -; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax |
69 |
| -; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 |
70 |
| -; X64-AVX-NEXT: vmovq %xmm0, %rax |
71 |
| -; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 |
72 |
| -; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| 32 | +; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] |
| 33 | +; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 |
73 | 34 | ; X64-AVX-NEXT: retq
|
74 | 35 | %and = and <2 x i64> %a, <i64 255, i64 65535>
|
75 | 36 | %cvt = sitofp <2 x i64> %and to <2 x double>
|
@@ -148,113 +109,43 @@ define <2 x double> @mask_uitofp_2i64_2f64(<2 x i64> %a) nounwind {
|
148 | 109 | define <4 x float> @mask_sitofp_4i64_4f32(<4 x i64> %a) nounwind {
|
149 | 110 | ; X32-SSE-LABEL: mask_sitofp_4i64_4f32:
|
150 | 111 | ; X32-SSE: # BB#0:
|
151 |
| -; X32-SSE-NEXT: pushl %ebp |
152 |
| -; X32-SSE-NEXT: movl %esp, %ebp |
153 |
| -; X32-SSE-NEXT: andl $-8, %esp |
154 |
| -; X32-SSE-NEXT: subl $48, %esp |
155 | 112 | ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
|
156 | 113 | ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
|
157 |
| -; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) |
158 |
| -; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) |
159 |
| -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] |
160 |
| -; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) |
161 |
| -; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
162 |
| -; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) |
163 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
164 |
| -; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) |
165 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
166 |
| -; X32-SSE-NEXT: fstps (%esp) |
167 |
| -; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
168 |
| -; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
169 |
| -; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
170 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
171 |
| -; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) |
172 |
| -; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) |
173 |
| -; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) |
174 |
| -; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
175 |
| -; X32-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
176 |
| -; X32-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] |
177 |
| -; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] |
178 |
| -; X32-SSE-NEXT: movl %ebp, %esp |
179 |
| -; X32-SSE-NEXT: popl %ebp |
| 114 | +; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] |
| 115 | +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
| 116 | +; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| 117 | +; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 |
180 | 118 | ; X32-SSE-NEXT: retl
|
181 | 119 | ;
|
182 | 120 | ; X32-AVX-LABEL: mask_sitofp_4i64_4f32:
|
183 | 121 | ; X32-AVX: # BB#0:
|
184 |
| -; X32-AVX-NEXT: pushl %ebp |
185 |
| -; X32-AVX-NEXT: movl %esp, %ebp |
186 |
| -; X32-AVX-NEXT: andl $-8, %esp |
187 |
| -; X32-AVX-NEXT: subl $48, %esp |
188 | 122 | ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
189 |
| -; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax |
190 |
| -; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 |
191 |
| -; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) |
192 |
| -; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax |
193 |
| -; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] |
194 |
| -; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 |
195 |
| -; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) |
196 |
| -; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 |
197 |
| -; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax |
198 |
| -; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 |
199 |
| -; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) |
200 |
| -; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax |
201 |
| -; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
202 |
| -; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 |
203 |
| -; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) |
204 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
205 |
| -; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) |
206 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
207 |
| -; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) |
208 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
209 |
| -; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) |
210 |
| -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) |
211 |
| -; X32-AVX-NEXT: fstps (%esp) |
212 |
| -; X32-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
213 |
| -; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] |
214 |
| -; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] |
215 |
| -; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] |
216 |
| -; X32-AVX-NEXT: movl %ebp, %esp |
217 |
| -; X32-AVX-NEXT: popl %ebp |
| 123 | +; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 |
| 124 | +; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] |
| 125 | +; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
| 126 | +; X32-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] |
| 127 | +; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 |
218 | 128 | ; X32-AVX-NEXT: vzeroupper
|
219 | 129 | ; X32-AVX-NEXT: retl
|
220 | 130 | ;
|
221 | 131 | ; X64-SSE-LABEL: mask_sitofp_4i64_4f32:
|
222 | 132 | ; X64-SSE: # BB#0:
|
223 | 133 | ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
224 | 134 | ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
|
225 |
| -; X64-SSE-NEXT: movd %xmm1, %rax |
226 |
| -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm3 |
227 |
| -; X64-SSE-NEXT: movd %xmm0, %rax |
228 |
| -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm2 |
229 |
| -; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] |
230 |
| -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] |
231 |
| -; X64-SSE-NEXT: movd %xmm1, %rax |
232 |
| -; X64-SSE-NEXT: xorps %xmm1, %xmm1 |
233 |
| -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm1 |
234 |
| -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
235 |
| -; X64-SSE-NEXT: movd %xmm0, %rax |
236 |
| -; X64-SSE-NEXT: xorps %xmm0, %xmm0 |
237 |
| -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm0 |
238 |
| -; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
239 |
| -; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] |
240 |
| -; X64-SSE-NEXT: movaps %xmm2, %xmm0 |
| 135 | +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] |
| 136 | +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
| 137 | +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] |
| 138 | +; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 |
241 | 139 | ; X64-SSE-NEXT: retq
|
242 | 140 | ;
|
243 | 141 | ; X64-AVX-LABEL: mask_sitofp_4i64_4f32:
|
244 | 142 | ; X64-AVX: # BB#0:
|
245 | 143 | ; X64-AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
246 |
| -; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax |
247 |
| -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 |
248 |
| -; X64-AVX-NEXT: vmovq %xmm0, %rax |
249 |
| -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 |
250 |
| -; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] |
251 |
| -; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 |
252 |
| -; X64-AVX-NEXT: vmovq %xmm0, %rax |
253 |
| -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 |
254 |
| -; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] |
255 |
| -; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax |
256 |
| -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 |
257 |
| -; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] |
| 144 | +; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 |
| 145 | +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] |
| 146 | +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] |
| 147 | +; X64-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] |
| 148 | +; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 |
258 | 149 | ; X64-AVX-NEXT: vzeroupper
|
259 | 150 | ; X64-AVX-NEXT: retq
|
260 | 151 | %and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535>
|
|
0 commit comments