Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit c348bc4

Browse files
committed
[X86][SSE] Improve recognition of i64 sitofp conversions that can be performed as i32 (PR29078)
Until AVX512DQ we only support i64/vXi64 sitofp conversion as scalars. This patch sees if the sign bit extends far enough that we can truncate to a i32 type and then perform sitofp without loss of precision. Differential Revision: https://reviews.llvm.org/D24345 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281502 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 1e06f6c commit c348bc4

File tree

3 files changed

+59
-151
lines changed

3 files changed

+59
-151
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31234,6 +31234,23 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
3123431234
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
3123531235
}
3123631236

31237+
// Without AVX512DQ we only support i64 to float scalar conversion. For both
31238+
// vectors and scalars, see if we know that the upper bits are all the sign
31239+
// bit, in which case we can truncate the input to i32 and convert from that.
31240+
if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) {
31241+
unsigned BitWidth = InVT.getScalarSizeInBits();
31242+
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
31243+
if (NumSignBits >= (BitWidth - 31)) {
31244+
EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
31245+
if (InVT.isVector())
31246+
TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
31247+
InVT.getVectorNumElements());
31248+
SDLoc dl(N);
31249+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
31250+
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
31251+
}
31252+
}
31253+
3123731254
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
3123831255
// a 32-bit target where SSE doesn't support i64->FP operations.
3123931256
if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {

test/CodeGen/X86/i64-to-float.ll

Lines changed: 26 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -9,67 +9,28 @@
99
define <2 x double> @mask_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
1010
; X32-SSE-LABEL: mask_sitofp_2i64_2f64:
1111
; X32-SSE: # BB#0:
12-
; X32-SSE-NEXT: pushl %ebp
13-
; X32-SSE-NEXT: movl %esp, %ebp
14-
; X32-SSE-NEXT: andl $-8, %esp
15-
; X32-SSE-NEXT: subl $32, %esp
12+
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1613
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
17-
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm0[0],zero
18-
; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
19-
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
20-
; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
21-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
22-
; X32-SSE-NEXT: fstpl {{[0-9]+}}(%esp)
23-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
24-
; X32-SSE-NEXT: fstpl (%esp)
25-
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
26-
; X32-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
27-
; X32-SSE-NEXT: movl %ebp, %esp
28-
; X32-SSE-NEXT: popl %ebp
14+
; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2915
; X32-SSE-NEXT: retl
3016
;
3117
; X32-AVX-LABEL: mask_sitofp_2i64_2f64:
3218
; X32-AVX: # BB#0:
33-
; X32-AVX-NEXT: pushl %ebp
34-
; X32-AVX-NEXT: movl %esp, %ebp
35-
; X32-AVX-NEXT: andl $-8, %esp
36-
; X32-AVX-NEXT: subl $32, %esp
37-
; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
38-
; X32-AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm0[0],zero
39-
; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
40-
; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
41-
; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
42-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
43-
; X32-AVX-NEXT: fstpl {{[0-9]+}}(%esp)
44-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
45-
; X32-AVX-NEXT: fstpl (%esp)
46-
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
47-
; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
48-
; X32-AVX-NEXT: movl %ebp, %esp
49-
; X32-AVX-NEXT: popl %ebp
19+
; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
20+
; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
5021
; X32-AVX-NEXT: retl
5122
;
5223
; X64-SSE-LABEL: mask_sitofp_2i64_2f64:
5324
; X64-SSE: # BB#0:
25+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
5426
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
55-
; X64-SSE-NEXT: movd %xmm0, %rax
56-
; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1
57-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
58-
; X64-SSE-NEXT: movd %xmm0, %rax
59-
; X64-SSE-NEXT: xorps %xmm0, %xmm0
60-
; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm0
61-
; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
62-
; X64-SSE-NEXT: movapd %xmm1, %xmm0
27+
; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
6328
; X64-SSE-NEXT: retq
6429
;
6530
; X64-AVX-LABEL: mask_sitofp_2i64_2f64:
6631
; X64-AVX: # BB#0:
67-
; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
68-
; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax
69-
; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
70-
; X64-AVX-NEXT: vmovq %xmm0, %rax
71-
; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
72-
; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
32+
; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u]
33+
; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
7334
; X64-AVX-NEXT: retq
7435
%and = and <2 x i64> %a, <i64 255, i64 65535>
7536
%cvt = sitofp <2 x i64> %and to <2 x double>
@@ -148,113 +109,43 @@ define <2 x double> @mask_uitofp_2i64_2f64(<2 x i64> %a) nounwind {
148109
define <4 x float> @mask_sitofp_4i64_4f32(<4 x i64> %a) nounwind {
149110
; X32-SSE-LABEL: mask_sitofp_4i64_4f32:
150111
; X32-SSE: # BB#0:
151-
; X32-SSE-NEXT: pushl %ebp
152-
; X32-SSE-NEXT: movl %esp, %ebp
153-
; X32-SSE-NEXT: andl $-8, %esp
154-
; X32-SSE-NEXT: subl $48, %esp
155112
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
156113
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
157-
; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
158-
; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
159-
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
160-
; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
161-
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
162-
; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
163-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
164-
; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp)
165-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
166-
; X32-SSE-NEXT: fstps (%esp)
167-
; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
168-
; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
169-
; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
170-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
171-
; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp)
172-
; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp)
173-
; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp)
174-
; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
175-
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
176-
; X32-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
177-
; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
178-
; X32-SSE-NEXT: movl %ebp, %esp
179-
; X32-SSE-NEXT: popl %ebp
114+
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
115+
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
116+
; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
117+
; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
180118
; X32-SSE-NEXT: retl
181119
;
182120
; X32-AVX-LABEL: mask_sitofp_4i64_4f32:
183121
; X32-AVX: # BB#0:
184-
; X32-AVX-NEXT: pushl %ebp
185-
; X32-AVX-NEXT: movl %esp, %ebp
186-
; X32-AVX-NEXT: andl $-8, %esp
187-
; X32-AVX-NEXT: subl $48, %esp
188122
; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
189-
; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax
190-
; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1
191-
; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
192-
; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax
193-
; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
194-
; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
195-
; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
196-
; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
197-
; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax
198-
; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1
199-
; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
200-
; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax
201-
; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
202-
; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
203-
; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
204-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
205-
; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp)
206-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
207-
; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp)
208-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
209-
; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp)
210-
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
211-
; X32-AVX-NEXT: fstps (%esp)
212-
; X32-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
213-
; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
214-
; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
215-
; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
216-
; X32-AVX-NEXT: movl %ebp, %esp
217-
; X32-AVX-NEXT: popl %ebp
123+
; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
124+
; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
125+
; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
126+
; X32-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
127+
; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
218128
; X32-AVX-NEXT: vzeroupper
219129
; X32-AVX-NEXT: retl
220130
;
221131
; X64-SSE-LABEL: mask_sitofp_4i64_4f32:
222132
; X64-SSE: # BB#0:
223133
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
224134
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
225-
; X64-SSE-NEXT: movd %xmm1, %rax
226-
; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm3
227-
; X64-SSE-NEXT: movd %xmm0, %rax
228-
; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm2
229-
; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
230-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
231-
; X64-SSE-NEXT: movd %xmm1, %rax
232-
; X64-SSE-NEXT: xorps %xmm1, %xmm1
233-
; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm1
234-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
235-
; X64-SSE-NEXT: movd %xmm0, %rax
236-
; X64-SSE-NEXT: xorps %xmm0, %xmm0
237-
; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm0
238-
; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
239-
; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
240-
; X64-SSE-NEXT: movaps %xmm2, %xmm0
135+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
136+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
137+
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
138+
; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
241139
; X64-SSE-NEXT: retq
242140
;
243141
; X64-AVX-LABEL: mask_sitofp_4i64_4f32:
244142
; X64-AVX: # BB#0:
245143
; X64-AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
246-
; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax
247-
; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
248-
; X64-AVX-NEXT: vmovq %xmm0, %rax
249-
; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
250-
; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
251-
; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
252-
; X64-AVX-NEXT: vmovq %xmm0, %rax
253-
; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
254-
; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
255-
; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax
256-
; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
257-
; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
144+
; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
145+
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
146+
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
147+
; X64-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
148+
; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
258149
; X64-AVX-NEXT: vzeroupper
259150
; X64-AVX-NEXT: retq
260151
%and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535>

test/CodeGen/X86/sse-fsignum.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,16 @@ define void @signum64a(<2 x double>*) {
3939
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
4040
; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
4141
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
42-
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
43-
; AVX1-NEXT: vmovq %xmm2, %rax
44-
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
45-
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
42+
; AVX1-NEXT: vmovq %xmm2, %rcx
43+
; AVX1-NEXT: vmovd %ecx, %xmm2
44+
; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
45+
; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2
4646
; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4747
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
48-
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm1
49-
; AVX1-NEXT: vmovq %xmm0, %rax
50-
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
51-
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
48+
; AVX1-NEXT: vmovq %xmm0, %rcx
49+
; AVX1-NEXT: vmovd %ecx, %xmm0
50+
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
51+
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
5252
; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0
5353
; AVX1-NEXT: vmovapd %xmm0, (%rdi)
5454
; AVX1-NEXT: retq
@@ -59,16 +59,16 @@ define void @signum64a(<2 x double>*) {
5959
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
6060
; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
6161
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
62-
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
63-
; AVX2-NEXT: vmovq %xmm2, %rax
64-
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
65-
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
62+
; AVX2-NEXT: vmovq %xmm2, %rcx
63+
; AVX2-NEXT: vmovd %ecx, %xmm2
64+
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
65+
; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2
6666
; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
6767
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
68-
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm1
69-
; AVX2-NEXT: vmovq %xmm0, %rax
70-
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
71-
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
68+
; AVX2-NEXT: vmovq %xmm0, %rcx
69+
; AVX2-NEXT: vmovd %ecx, %xmm0
70+
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
71+
; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
7272
; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0
7373
; AVX2-NEXT: vmovapd %xmm0, (%rdi)
7474
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)