Skip to content

Commit 0a4fb2b

Browse files
committed
copy over X86ISelLowering and x86 test cases
from @junaire's https://reviews.llvm.org/D146905
1 parent 30f1017 commit 0a4fb2b

File tree

4 files changed

+273
-2
lines changed

4 files changed

+273
-2
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
5454
TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
5555
TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
5656
TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
57-
TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4))
57+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
5858
TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
5959
TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
6060
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
655655
setOperationAction(ISD::FSIN , VT, Expand);
656656
setOperationAction(ISD::FCOS , VT, Expand);
657657
setOperationAction(ISD::FSINCOS, VT, Expand);
658+
setOperationAction(ISD::FTAN , VT, Expand);
658659
}
659660

660661
// Half type will be promoted by default.
@@ -730,12 +731,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
730731
setOperationAction(ISD::FSIN , MVT::f32, Expand);
731732
setOperationAction(ISD::FCOS , MVT::f32, Expand);
732733
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
734+
setOperationAction(ISD::FTAN , MVT::f32, Expand);
733735

734736
if (UseX87) {
735737
// Always expand sin/cos functions even though x87 has an instruction.
736738
setOperationAction(ISD::FSIN, MVT::f64, Expand);
737739
setOperationAction(ISD::FCOS, MVT::f64, Expand);
738740
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
741+
setOperationAction(ISD::FTAN, MVT::f64, Expand);
739742
}
740743
} else if (UseX87) {
741744
// f32 and f64 in x87.
@@ -751,6 +754,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
751754
setOperationAction(ISD::FSIN , VT, Expand);
752755
setOperationAction(ISD::FCOS , VT, Expand);
753756
setOperationAction(ISD::FSINCOS, VT, Expand);
757+
setOperationAction(ISD::FTAN , VT, Expand);
754758
}
755759
}
756760

@@ -820,6 +824,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
820824
setOperationAction(ISD::FSIN , MVT::f80, Expand);
821825
setOperationAction(ISD::FCOS , MVT::f80, Expand);
822826
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
827+
setOperationAction(ISD::FTAN , MVT::f80, Expand);
823828

824829
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
825830
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
@@ -877,6 +882,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
877882
setOperationAction(ISD::FCOS, MVT::f128, LibCall);
878883
setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
879884
setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
885+
setOperationAction(ISD::FTAN, MVT::f128, LibCall);
886+
setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
880887
// No STRICT_FSINCOS
881888
setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
882889
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -931,6 +938,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
931938
setOperationAction(ISD::FSIN, VT, Expand);
932939
setOperationAction(ISD::FSINCOS, VT, Expand);
933940
setOperationAction(ISD::FCOS, VT, Expand);
941+
setOperationAction(ISD::FTAN, VT, Expand);
934942
setOperationAction(ISD::FREM, VT, Expand);
935943
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
936944
setOperationAction(ISD::FPOW, VT, Expand);
@@ -2475,7 +2483,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
24752483
ISD::FLOG, ISD::STRICT_FLOG,
24762484
ISD::FLOG10, ISD::STRICT_FLOG10,
24772485
ISD::FPOW, ISD::STRICT_FPOW,
2478-
ISD::FSIN, ISD::STRICT_FSIN})
2486+
ISD::FSIN, ISD::STRICT_FSIN,
2487+
ISD::FTAN, ISD::STRICT_FTAN})
24792488
if (isOperationExpand(Op, MVT::f32))
24802489
setOperationAction(Op, MVT::f32, Promote);
24812490

llvm/test/CodeGen/X86/llvm.tan.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define half @use_tanf16(half %a) {
5+
; CHECK-LABEL: use_tanf16:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: pushq %rax
8+
; CHECK-NEXT: .cfi_def_cfa_offset 16
9+
; CHECK-NEXT: callq __extendhfsf2@PLT
10+
; CHECK-NEXT: callq tanf@PLT
11+
; CHECK-NEXT: callq __truncsfhf2@PLT
12+
; CHECK-NEXT: popq %rax
13+
; CHECK-NEXT: .cfi_def_cfa_offset 8
14+
; CHECK-NEXT: retq
15+
%x = call half @llvm.tan.f16(half %a)
16+
ret half %x
17+
}
18+
19+
define float @use_tanf32(float %a) {
20+
; CHECK-LABEL: use_tanf32:
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: jmp tanf@PLT # TAILCALL
23+
%x = call float @llvm.tan.f32(float %a)
24+
ret float %x
25+
}
26+
27+
define double @use_tanf64(double %a) {
28+
; CHECK-LABEL: use_tanf64:
29+
; CHECK: # %bb.0:
30+
; CHECK-NEXT: jmp tan@PLT # TAILCALL
31+
%x = call double @llvm.tan.f64(double %a)
32+
ret double %x
33+
}
34+
35+
define fp128 @use_tanfp128(fp128 %a) {
36+
; CHECK-LABEL: use_tanfp128:
37+
; CHECK: # %bb.0:
38+
; CHECK-NEXT: jmp tanl@PLT # TAILCALL
39+
%x = call fp128 @llvm.tan.f128(fp128 %a)
40+
ret fp128 %x
41+
}
42+
43+
define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) {
44+
; CHECK-LABEL: use_tanppc_fp128:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: pushq %rax
47+
; CHECK-NEXT: .cfi_def_cfa_offset 16
48+
; CHECK-NEXT: callq tanl@PLT
49+
; CHECK-NEXT: popq %rax
50+
; CHECK-NEXT: .cfi_def_cfa_offset 8
51+
; CHECK-NEXT: retq
52+
%x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
53+
ret ppc_fp128 %x
54+
}
55+
56+
declare half @llvm.tan.f16(half)
57+
declare float @llvm.tan.f32(float)
58+
declare double @llvm.tan.f64(double)
59+
declare fp128 @llvm.tan.f128(fp128)
60+
declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128)

llvm/test/CodeGen/X86/vec-libcalls.ll

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
1717
declare <6 x float> @llvm.sin.v6f32(<6 x float>)
1818
declare <3 x double> @llvm.sin.v3f64(<3 x double>)
1919

20+
declare <1 x float> @llvm.tan.v1f32(<1 x float>)
21+
declare <2 x float> @llvm.tan.v2f32(<2 x float>)
22+
declare <3 x float> @llvm.tan.v3f32(<3 x float>)
23+
declare <4 x float> @llvm.tan.v4f32(<4 x float>)
24+
declare <5 x float> @llvm.tan.v5f32(<5 x float>)
25+
declare <6 x float> @llvm.tan.v6f32(<6 x float>)
26+
declare <3 x double> @llvm.tan.v3f64(<3 x double>)
27+
2028
; Verify that all of the potential libcall candidates are handled.
2129
; Some of these have custom lowering, so those cases won't have
2230
; libcalls.
@@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
230238
ret <3 x double> %r
231239
}
232240

241+
define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
242+
; CHECK-LABEL: tan_v1f32:
243+
; CHECK: # %bb.0:
244+
; CHECK-NEXT: pushq %rax
245+
; CHECK-NEXT: callq tanf@PLT
246+
; CHECK-NEXT: popq %rax
247+
; CHECK-NEXT: retq
248+
%r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
249+
ret <1 x float> %r
250+
}
251+
252+
define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
253+
; CHECK-LABEL: tan_v2f32:
254+
; CHECK: # %bb.0:
255+
; CHECK-NEXT: subq $40, %rsp
256+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
257+
; CHECK-NEXT: callq tanf@PLT
258+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
259+
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
260+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
261+
; CHECK-NEXT: callq tanf@PLT
262+
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
263+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
264+
; CHECK-NEXT: addq $40, %rsp
265+
; CHECK-NEXT: retq
266+
%r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
267+
ret <2 x float> %r
268+
}
269+
270+
define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
271+
; CHECK-LABEL: tan_v3f32:
272+
; CHECK: # %bb.0:
273+
; CHECK-NEXT: subq $40, %rsp
274+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
275+
; CHECK-NEXT: callq tanf@PLT
276+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
277+
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
278+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
279+
; CHECK-NEXT: callq tanf@PLT
280+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
281+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
282+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
283+
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
284+
; CHECK-NEXT: # xmm0 = mem[1,0]
285+
; CHECK-NEXT: callq tanf@PLT
286+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
287+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
288+
; CHECK-NEXT: addq $40, %rsp
289+
; CHECK-NEXT: retq
290+
%r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
291+
ret <3 x float> %r
292+
}
293+
294+
define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
295+
; CHECK-LABEL: tan_v4f32:
296+
; CHECK: # %bb.0:
297+
; CHECK-NEXT: subq $40, %rsp
298+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
299+
; CHECK-NEXT: callq tanf@PLT
300+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
301+
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
302+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
303+
; CHECK-NEXT: callq tanf@PLT
304+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
305+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
306+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
307+
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
308+
; CHECK-NEXT: # xmm0 = mem[1,0]
309+
; CHECK-NEXT: callq tanf@PLT
310+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
311+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
312+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
313+
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
314+
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
315+
; CHECK-NEXT: callq tanf@PLT
316+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
317+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
318+
; CHECK-NEXT: addq $40, %rsp
319+
; CHECK-NEXT: retq
320+
%r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
321+
ret <4 x float> %r
322+
}
323+
324+
define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
325+
; CHECK-LABEL: tan_v5f32:
326+
; CHECK: # %bb.0:
327+
; CHECK-NEXT: subq $72, %rsp
328+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
329+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
330+
; CHECK-NEXT: vzeroupper
331+
; CHECK-NEXT: callq tanf@PLT
332+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
333+
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
334+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
335+
; CHECK-NEXT: callq tanf@PLT
336+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
337+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
338+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
339+
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
340+
; CHECK-NEXT: # xmm0 = mem[1,0]
341+
; CHECK-NEXT: callq tanf@PLT
342+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
343+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
344+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
345+
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
346+
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
347+
; CHECK-NEXT: callq tanf@PLT
348+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
349+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
350+
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
351+
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
352+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
353+
; CHECK-NEXT: vzeroupper
354+
; CHECK-NEXT: callq tanf@PLT
355+
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
356+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
357+
; CHECK-NEXT: addq $72, %rsp
358+
; CHECK-NEXT: retq
359+
%r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
360+
ret <5 x float> %r
361+
}
362+
363+
define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
364+
; CHECK-LABEL: tan_v6f32:
365+
; CHECK: # %bb.0:
366+
; CHECK-NEXT: subq $72, %rsp
367+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
368+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
369+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
370+
; CHECK-NEXT: vzeroupper
371+
; CHECK-NEXT: callq tanf@PLT
372+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
373+
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
374+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
375+
; CHECK-NEXT: callq tanf@PLT
376+
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
377+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
378+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
379+
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
380+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
381+
; CHECK-NEXT: vzeroupper
382+
; CHECK-NEXT: callq tanf@PLT
383+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
384+
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
385+
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
386+
; CHECK-NEXT: callq tanf@PLT
387+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
388+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
389+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
390+
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
391+
; CHECK-NEXT: # xmm0 = mem[1,0]
392+
; CHECK-NEXT: callq tanf@PLT
393+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
394+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
395+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
396+
; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
397+
; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
398+
; CHECK-NEXT: callq tanf@PLT
399+
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
400+
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
401+
; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
402+
; CHECK-NEXT: addq $72, %rsp
403+
; CHECK-NEXT: retq
404+
%r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
405+
ret <6 x float> %r
406+
}
407+
408+
define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
409+
; CHECK-LABEL: tan_v3f64:
410+
; CHECK: # %bb.0:
411+
; CHECK-NEXT: subq $72, %rsp
412+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
413+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414+
; CHECK-NEXT: vzeroupper
415+
; CHECK-NEXT: callq tan@PLT
416+
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
417+
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
418+
; CHECK-NEXT: # xmm0 = mem[1,0]
419+
; CHECK-NEXT: callq tan@PLT
420+
; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
421+
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
422+
; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
423+
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
424+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
425+
; CHECK-NEXT: vzeroupper
426+
; CHECK-NEXT: callq tan@PLT
427+
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
428+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
429+
; CHECK-NEXT: addq $72, %rsp
430+
; CHECK-NEXT: retq
431+
%r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
432+
ret <3 x double> %r
433+
}
434+
233435
define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
234436
; CHECK-LABEL: fabs_v2f32:
235437
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)