Skip to content

Commit 45cf88f

Browse files
committed
X86: Support FCANONICALIZE on f64 for i686 with SSE2 or AVX
1 parent 76b0187 commit 45cf88f

File tree

2 files changed

+144
-1
lines changed

2 files changed

+144
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
334334
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
335335
}
336336
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
337+
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
337338
if (Subtarget.is64Bit()) {
338339
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
339340
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
340-
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
341341
}
342342
}
343343
if (Subtarget.hasAVX10_2()) {
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
2+
; RUN: llc -mattr=+sse2 -mtriple=i686-- < %s | FileCheck %s -check-prefixes=SSE2
3+
; RUN: llc -mattr=+avx -mtriple=i686-- < %s | FileCheck %s -check-prefixes=AVX
4+
5+
define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
6+
; SSE2-LABEL: canonicalize_fp64:
7+
; SSE2: # %bb.0: # %start
8+
; SSE2-NEXT: pushl %ebp
9+
; SSE2-NEXT: .cfi_def_cfa_offset 8
10+
; SSE2-NEXT: .cfi_offset %ebp, -8
11+
; SSE2-NEXT: movl %esp, %ebp
12+
; SSE2-NEXT: .cfi_def_cfa_register %ebp
13+
; SSE2-NEXT: andl $-8, %esp
14+
; SSE2-NEXT: subl $8, %esp
15+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
16+
; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
17+
; SSE2-NEXT: movapd %xmm0, %xmm2
18+
; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
19+
; SSE2-NEXT: movapd %xmm2, %xmm3
20+
; SSE2-NEXT: andpd %xmm1, %xmm3
21+
; SSE2-NEXT: maxsd %xmm0, %xmm1
22+
; SSE2-NEXT: andnpd %xmm1, %xmm2
23+
; SSE2-NEXT: orpd %xmm3, %xmm2
24+
; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
25+
; SSE2-NEXT: movsd %xmm2, (%esp)
26+
; SSE2-NEXT: fldl (%esp)
27+
; SSE2-NEXT: movl %ebp, %esp
28+
; SSE2-NEXT: popl %ebp
29+
; SSE2-NEXT: .cfi_def_cfa %esp, 4
30+
; SSE2-NEXT: retl
31+
;
32+
; AVX-LABEL: canonicalize_fp64:
33+
; AVX: # %bb.0: # %start
34+
; AVX-NEXT: pushl %ebp
35+
; AVX-NEXT: .cfi_def_cfa_offset 8
36+
; AVX-NEXT: .cfi_offset %ebp, -8
37+
; AVX-NEXT: movl %esp, %ebp
38+
; AVX-NEXT: .cfi_def_cfa_register %ebp
39+
; AVX-NEXT: andl $-8, %esp
40+
; AVX-NEXT: subl $8, %esp
41+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
42+
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
43+
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
44+
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
45+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
46+
; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
47+
; AVX-NEXT: vmovsd %xmm0, (%esp)
48+
; AVX-NEXT: fldl (%esp)
49+
; AVX-NEXT: movl %ebp, %esp
50+
; AVX-NEXT: popl %ebp
51+
; AVX-NEXT: .cfi_def_cfa %esp, 4
52+
; AVX-NEXT: retl
53+
start:
54+
%c = fcmp olt double %a, %b
55+
%d = fcmp uno double %a, 0.000000e+00
56+
%or.cond.i.i = or i1 %d, %c
57+
%e = select i1 %or.cond.i.i, double %b, double %a
58+
%f = tail call double @llvm.canonicalize.f64(double %e) #2
59+
ret double %f
60+
}
61+
62+
define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
63+
; SSE2-LABEL: v_test_canonicalize_var_f64:
64+
; SSE2: # %bb.0:
65+
; SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
66+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
67+
; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
68+
; SSE2-NEXT: movsd %xmm0, (%eax)
69+
; SSE2-NEXT: retl
70+
;
71+
; AVX-LABEL: v_test_canonicalize_var_f64:
72+
; AVX: # %bb.0:
73+
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
74+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
75+
; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
76+
; AVX-NEXT: vmovsd %xmm0, (%eax)
77+
; AVX-NEXT: retl
78+
%val = load double, double addrspace(1)* %out
79+
%canonicalized = call double @llvm.canonicalize.f64(double %val)
80+
store double %canonicalized, double addrspace(1)* %out
81+
ret void
82+
}
83+
84+
define void @canonicalize_undef(double addrspace(1)* %out) {
85+
; SSE2-LABEL: canonicalize_undef:
86+
; SSE2: # %bb.0:
87+
; SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
88+
; SSE2-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
89+
; SSE2-NEXT: movl $0, (%eax)
90+
; SSE2-NEXT: retl
91+
;
92+
; AVX-LABEL: canonicalize_undef:
93+
; AVX: # %bb.0:
94+
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
95+
; AVX-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
96+
; AVX-NEXT: movl $0, (%eax)
97+
; AVX-NEXT: retl
98+
%canonicalized = call double @llvm.canonicalize.f64(double undef)
99+
store double %canonicalized, double addrspace(1)* %out
100+
ret void
101+
}
102+
103+
define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) {
104+
; SSE2-LABEL: canon_fp64_varargsv4f64:
105+
; SSE2: # %bb.0:
106+
; SSE2-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0]
107+
; SSE2-NEXT: mulpd %xmm2, %xmm0
108+
; SSE2-NEXT: mulpd %xmm2, %xmm1
109+
; SSE2-NEXT: retl
110+
;
111+
; AVX-LABEL: canon_fp64_varargsv4f64:
112+
; AVX: # %bb.0:
113+
; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
114+
; AVX-NEXT: retl
115+
%canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a)
116+
ret <4 x double> %canonicalized
117+
}
118+
119+
define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 {
120+
; SSE2-LABEL: vec_canonicalize_var_v4f64:
121+
; SSE2: # %bb.0:
122+
; SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
123+
; SSE2-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0]
124+
; SSE2-NEXT: movapd 16(%eax), %xmm1
125+
; SSE2-NEXT: mulpd %xmm0, %xmm1
126+
; SSE2-NEXT: mulpd (%eax), %xmm0
127+
; SSE2-NEXT: movapd %xmm0, (%eax)
128+
; SSE2-NEXT: movapd %xmm1, 16(%eax)
129+
; SSE2-NEXT: retl
130+
;
131+
; AVX-LABEL: vec_canonicalize_var_v4f64:
132+
; AVX: # %bb.0:
133+
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
134+
; AVX-NEXT: vmovapd (%eax), %ymm0
135+
; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
136+
; AVX-NEXT: vmovapd %ymm0, (%eax)
137+
; AVX-NEXT: vzeroupper
138+
; AVX-NEXT: retl
139+
%val = load <4 x double>, <4 x double> addrspace(1)* %out
140+
%canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %val)
141+
store <4 x double> %canonicalized, <4 x double> addrspace(1)* %out
142+
ret void
143+
}

0 commit comments

Comments
 (0)