Skip to content

Commit 410e5b1

Browse files
committed
[VE] Support fabs/fcos/fsin/fsqrt math functions
VE doesn't have instruction for fabs/fcos/fsin/fsqrt, so expand them. Add regression tests also. Update fcopysign regression test, also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D89457
1 parent e384e94 commit 410e5b1

File tree

6 files changed

+605
-9
lines changed

6 files changed

+605
-9
lines changed

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,11 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
808808

809809
// VE doesn't have following floating point math functions.
810810
for (MVT VT : MVT::fp_valuetypes()) {
811+
setOperationAction(ISD::FABS, VT, Expand);
811812
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
813+
setOperationAction(ISD::FCOS, VT, Expand);
814+
setOperationAction(ISD::FSIN, VT, Expand);
815+
setOperationAction(ISD::FSQRT, VT, Expand);
812816
}
813817

814818
/// } Floating-point math functions

llvm/test/CodeGen/VE/fabs.ll

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
; RUN: llc < %s -mtriple=ve | FileCheck %s
2+
3+
;;; Test ‘llvm.fabs.*’ Intrinsic
4+
;;;
5+
;;; Syntax:
6+
;;; This is an overloaded intrinsic. You can use llvm.fabs on any
7+
;;; floating-point or vector of floating-point type. Not all targets
8+
;;; support all types however.
9+
;;;
10+
;;; declare float @llvm.fabs.f32(float %Val)
11+
;;; declare double @llvm.fabs.f64(double %Val)
12+
;;; declare x86_fp80 @llvm.fabs.f80(x86_fp80 %Val)
13+
;;; declare fp128 @llvm.fabs.f128(fp128 %Val)
14+
;;; declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %Val)
15+
;;;
16+
;;; Overview:
17+
;;; The ‘llvm.fabs.*’ intrinsics return the absolute value of the operand.
18+
;;;
19+
;;; Arguments:
20+
;;; The argument and return value are floating-point numbers of the same
21+
;;; type.
22+
;;;
23+
;;; Semantics:
24+
;;; This function returns the same values as the libm fabs functions would,
25+
;;; and handles error conditions in the same way.
26+
;;;
27+
;;; Note:
28+
;;; We test only float/double/fp128.
29+
30+
; Function Attrs: nounwind readnone
31+
define float @fabs_float_var(float %0) {
32+
; CHECK-LABEL: fabs_float_var:
33+
; CHECK: .LBB{{[0-9]+}}_2:
34+
; CHECK-NEXT: sra.l %s0, %s0, 32
35+
; CHECK-NEXT: and %s0, %s0, (33)0
36+
; CHECK-NEXT: sll %s0, %s0, 32
37+
; CHECK-NEXT: or %s11, 0, %s9
38+
%2 = tail call fast float @llvm.fabs.f32(float %0)
39+
ret float %2
40+
}
41+
42+
; Function Attrs: nounwind readnone speculatable willreturn
43+
declare float @llvm.fabs.f32(float)
44+
45+
; Function Attrs: nounwind readnone
46+
define double @fabs_double_var(double %0) {
47+
; CHECK-LABEL: fabs_double_var:
48+
; CHECK: .LBB{{[0-9]+}}_2:
49+
; CHECK-NEXT: and %s0, %s0, (1)0
50+
; CHECK-NEXT: or %s11, 0, %s9
51+
%2 = tail call fast double @llvm.fabs.f64(double %0)
52+
ret double %2
53+
}
54+
55+
; Function Attrs: nounwind readnone speculatable willreturn
56+
declare double @llvm.fabs.f64(double)
57+
58+
; Function Attrs: nounwind readnone
59+
define fp128 @fabs_quad_var(fp128 %0) {
60+
; CHECK-LABEL: fabs_quad_var:
61+
; CHECK: .LBB{{[0-9]+}}_2:
62+
; CHECK-NEXT: st %s1, 176(, %s11)
63+
; CHECK-NEXT: st %s0, 184(, %s11)
64+
; CHECK-NEXT: ld1b.zx %s0, 191(, %s11)
65+
; CHECK-NEXT: and %s0, %s0, (57)0
66+
; CHECK-NEXT: st1b %s0, 191(, %s11)
67+
; CHECK-NEXT: ld %s1, 176(, %s11)
68+
; CHECK-NEXT: ld %s0, 184(, %s11)
69+
; CHECK-NEXT: or %s11, 0, %s9
70+
%2 = tail call fast fp128 @llvm.fabs.f128(fp128 %0)
71+
ret fp128 %2
72+
}
73+
74+
; Function Attrs: nounwind readnone speculatable willreturn
75+
declare fp128 @llvm.fabs.f128(fp128)
76+
77+
; Function Attrs: norecurse nounwind readnone
78+
define float @fabs_float_zero() {
79+
; CHECK-LABEL: fabs_float_zero:
80+
; CHECK: .LBB{{[0-9]+}}_2:
81+
; CHECK-NEXT: lea.sl %s0, 0
82+
; CHECK-NEXT: or %s11, 0, %s9
83+
ret float 0.000000e+00
84+
}
85+
86+
; Function Attrs: norecurse nounwind readnone
87+
define double @fabs_double_zero() {
88+
; CHECK-LABEL: fabs_double_zero:
89+
; CHECK: .LBB{{[0-9]+}}_2:
90+
; CHECK-NEXT: lea.sl %s0, 0
91+
; CHECK-NEXT: or %s11, 0, %s9
92+
ret double 0.000000e+00
93+
}
94+
95+
; Function Attrs: norecurse nounwind readnone
96+
define fp128 @fabs_quad_zero() {
97+
; CHECK-LABEL: fabs_quad_zero:
98+
; CHECK: .LBB{{[0-9]+}}_2:
99+
; CHECK-NEXT: lea %s0, .LCPI{{[0-9]+}}_0@lo
100+
; CHECK-NEXT: and %s0, %s0, (32)0
101+
; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s0)
102+
; CHECK-NEXT: ld %s0, 8(, %s2)
103+
; CHECK-NEXT: ld %s1, (, %s2)
104+
; CHECK-NEXT: or %s11, 0, %s9
105+
ret fp128 0xL00000000000000000000000000000000
106+
}
107+
108+
; Function Attrs: norecurse nounwind readnone
109+
define float @fabs_float_const() {
110+
; CHECK-LABEL: fabs_float_const:
111+
; CHECK: .LBB{{[0-9]+}}_2:
112+
; CHECK-NEXT: lea.sl %s0, 1073741824
113+
; CHECK-NEXT: or %s11, 0, %s9
114+
ret float 2.000000e+00
115+
}
116+
117+
; Function Attrs: norecurse nounwind readnone
118+
define double @fabs_double_const() {
119+
; CHECK-LABEL: fabs_double_const:
120+
; CHECK: .LBB{{[0-9]+}}_2:
121+
; CHECK-NEXT: lea.sl %s0, 1073741824
122+
; CHECK-NEXT: or %s11, 0, %s9
123+
ret double 2.000000e+00
124+
}
125+
126+
; Function Attrs: nounwind readnone
127+
define fp128 @fabs_quad_const() {
128+
; CHECK-LABEL: fabs_quad_const:
129+
; CHECK: .LBB{{[0-9]+}}_2:
130+
; CHECK-NEXT: lea %s0, .LCPI{{[0-9]+}}_0@lo
131+
; CHECK-NEXT: and %s0, %s0, (32)0
132+
; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s0)
133+
; CHECK-NEXT: ld %s0, 8(, %s2)
134+
; CHECK-NEXT: ld %s1, (, %s2)
135+
; CHECK-NEXT: or %s11, 0, %s9
136+
%1 = tail call fast fp128 @llvm.fabs.f128(fp128 0xL0000000000000000C000000000000000)
137+
ret fp128 %1
138+
}

llvm/test/CodeGen/VE/fp_copysign.ll renamed to llvm/test/CodeGen/VE/fcopysign.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define float @copysign_float_var(float %0, float %1) {
4141
; CHECK-NEXT: or %s0, %s0, %s1
4242
; CHECK-NEXT: sll %s0, %s0, 32
4343
; CHECK-NEXT: or %s11, 0, %s9
44-
%3 = tail call float @llvm.copysign.f32(float %0, float %1)
44+
%3 = tail call fast float @llvm.copysign.f32(float %0, float %1)
4545
ret float %3
4646
}
4747

@@ -56,7 +56,7 @@ define double @copysign_double_var(double %0, double %1) {
5656
; CHECK-NEXT: and %s0, %s0, (1)0
5757
; CHECK-NEXT: or %s0, %s0, %s1
5858
; CHECK-NEXT: or %s11, 0, %s9
59-
%3 = tail call double @llvm.copysign.f64(double %0, double %1)
59+
%3 = tail call fast double @llvm.copysign.f64(double %0, double %1)
6060
ret double %3
6161
}
6262

@@ -81,7 +81,7 @@ define fp128 @copysign_quad_var(fp128 %0, fp128 %1) {
8181
; CHECK-NEXT: ld %s1, 176(, %s11)
8282
; CHECK-NEXT: ld %s0, 184(, %s11)
8383
; CHECK-NEXT: or %s11, 0, %s9
84-
%3 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
84+
%3 = tail call fast fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
8585
ret fp128 %3
8686
}
8787

@@ -98,7 +98,7 @@ define float @copysign_float_zero(float %0) {
9898
; CHECK-NEXT: and %s0, %s0, %s1
9999
; CHECK-NEXT: sll %s0, %s0, 32
100100
; CHECK-NEXT: or %s11, 0, %s9
101-
%2 = tail call float @llvm.copysign.f32(float 0.000000e+00, float %0)
101+
%2 = tail call fast float @llvm.copysign.f32(float 0.000000e+00, float %0)
102102
ret float %2
103103
}
104104

@@ -108,7 +108,7 @@ define double @copysign_double_zero(double %0) {
108108
; CHECK: .LBB{{[0-9]+}}_2:
109109
; CHECK-NEXT: and %s0, %s0, (1)1
110110
; CHECK-NEXT: or %s11, 0, %s9
111-
%2 = tail call double @llvm.copysign.f64(double 0.000000e+00, double %0)
111+
%2 = tail call fast double @llvm.copysign.f64(double 0.000000e+00, double %0)
112112
ret double %2
113113
}
114114

@@ -135,7 +135,7 @@ define fp128 @copysign_quad_zero(fp128 %0) {
135135
; CHECK-NEXT: ld %s1, 176(, %s11)
136136
; CHECK-NEXT: ld %s0, 184(, %s11)
137137
; CHECK-NEXT: or %s11, 0, %s9
138-
%2 = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000000000000000000000, fp128 %0)
138+
%2 = tail call fast fp128 @llvm.copysign.f128(fp128 0xL00000000000000000000000000000000, fp128 %0)
139139
ret fp128 %2
140140
}
141141

@@ -151,7 +151,7 @@ define float @copysign_float_const(float %0) {
151151
; CHECK-NEXT: or %s0, %s0, %s1
152152
; CHECK-NEXT: sll %s0, %s0, 32
153153
; CHECK-NEXT: or %s11, 0, %s9
154-
%2 = tail call float @llvm.copysign.f32(float -2.000000e+00, float %0)
154+
%2 = tail call fast float @llvm.copysign.f32(float -2.000000e+00, float %0)
155155
ret float %2
156156
}
157157

@@ -163,7 +163,7 @@ define double @copysign_double_const(double %0) {
163163
; CHECK-NEXT: lea.sl %s1, 1073741824
164164
; CHECK-NEXT: or %s0, %s0, %s1
165165
; CHECK-NEXT: or %s11, 0, %s9
166-
%2 = tail call double @llvm.copysign.f64(double -2.000000e+00, double %0)
166+
%2 = tail call fast double @llvm.copysign.f64(double -2.000000e+00, double %0)
167167
ret double %2
168168
}
169169

@@ -190,6 +190,6 @@ define fp128 @copysign_quad_const(fp128 %0) {
190190
; CHECK-NEXT: ld %s1, 176(, %s11)
191191
; CHECK-NEXT: ld %s0, 184(, %s11)
192192
; CHECK-NEXT: or %s11, 0, %s9
193-
%2 = tail call fp128 @llvm.copysign.f128(fp128 0xL0000000000000000C000000000000000, fp128 %0)
193+
%2 = tail call fast fp128 @llvm.copysign.f128(fp128 0xL0000000000000000C000000000000000, fp128 %0)
194194
ret fp128 %2
195195
}

llvm/test/CodeGen/VE/fcos.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
; RUN: llc < %s -mtriple=ve | FileCheck %s
2+
3+
;;; Test ‘llvm.cos.*’ intrinsic
4+
;;;
5+
;;; Syntax:
6+
;;; This is an overloaded intrinsic. You can use llvm.cos on any
7+
;;; floating-point or vector of floating-point type. Not all targets
8+
;;; support all types however.
9+
;;;
10+
;;; declare float @llvm.cos.f32(float %Val)
11+
;;; declare double @llvm.cos.f64(double %Val)
12+
;;; declare x86_fp80 @llvm.cos.f80(x86_fp80 %Val)
13+
;;; declare fp128 @llvm.cos.f128(fp128 %Val)
14+
;;; declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128 %Val)
15+
;;;
16+
;;; Overview:
17+
;;; The ‘llvm.cos.*’ intrinsics return the cosine of the operand.
18+
;;;
19+
;;; Arguments:
20+
;;; The argument and return value are floating-point numbers of the same type.
21+
;;;
22+
;;; Semantics:
23+
;;; Return the same value as a corresponding libm ‘cos’ function but without
24+
;;; trapping or setting errno.
25+
;;;
26+
;;; When specified with the fast-math-flag ‘afn’, the result may be
27+
;;; approximated using a less accurate calculation.
28+
;;;
29+
;;; Note:
30+
;;; We test only float/double/fp128.
31+
32+
; Function Attrs: nounwind readnone
33+
define float @fcos_float_var(float %0) {
34+
; CHECK-LABEL: fcos_float_var:
35+
; CHECK: .LBB{{[0-9]+}}_2:
36+
; CHECK-NEXT: lea %s1, cosf@lo
37+
; CHECK-NEXT: and %s1, %s1, (32)0
38+
; CHECK-NEXT: lea.sl %s12, cosf@hi(, %s1)
39+
; CHECK-NEXT: bsic %s10, (, %s12)
40+
; CHECK-NEXT: or %s11, 0, %s9
41+
%2 = tail call fast float @llvm.cos.f32(float %0)
42+
ret float %2
43+
}
44+
45+
; Function Attrs: nounwind readnone speculatable willreturn
46+
declare float @llvm.cos.f32(float)
47+
48+
; Function Attrs: nounwind readnone
49+
define double @fcos_double_var(double %0) {
50+
; CHECK-LABEL: fcos_double_var:
51+
; CHECK: .LBB{{[0-9]+}}_2:
52+
; CHECK-NEXT: lea %s1, cos@lo
53+
; CHECK-NEXT: and %s1, %s1, (32)0
54+
; CHECK-NEXT: lea.sl %s12, cos@hi(, %s1)
55+
; CHECK-NEXT: bsic %s10, (, %s12)
56+
; CHECK-NEXT: or %s11, 0, %s9
57+
%2 = tail call fast double @llvm.cos.f64(double %0)
58+
ret double %2
59+
}
60+
61+
; Function Attrs: nounwind readnone speculatable willreturn
62+
declare double @llvm.cos.f64(double)
63+
64+
; Function Attrs: nounwind readnone
65+
define fp128 @fcos_quad_var(fp128 %0) {
66+
; CHECK-LABEL: fcos_quad_var:
67+
; CHECK: .LBB{{[0-9]+}}_2:
68+
; CHECK-NEXT: lea %s2, cosl@lo
69+
; CHECK-NEXT: and %s2, %s2, (32)0
70+
; CHECK-NEXT: lea.sl %s12, cosl@hi(, %s2)
71+
; CHECK-NEXT: bsic %s10, (, %s12)
72+
; CHECK-NEXT: or %s11, 0, %s9
73+
%2 = tail call fast fp128 @llvm.cos.f128(fp128 %0)
74+
ret fp128 %2
75+
}
76+
77+
; Function Attrs: nounwind readnone speculatable willreturn
78+
declare fp128 @llvm.cos.f128(fp128)
79+
80+
; Function Attrs: norecurse nounwind readnone
81+
define float @fcos_float_zero() {
82+
; CHECK-LABEL: fcos_float_zero:
83+
; CHECK: .LBB{{[0-9]+}}_2:
84+
; CHECK-NEXT: lea.sl %s0, 1065353216
85+
; CHECK-NEXT: or %s11, 0, %s9
86+
ret float 1.000000e+00
87+
}
88+
89+
; Function Attrs: norecurse nounwind readnone
90+
define double @fcos_double_zero() {
91+
; CHECK-LABEL: fcos_double_zero:
92+
; CHECK: .LBB{{[0-9]+}}_2:
93+
; CHECK-NEXT: lea.sl %s0, 1072693248
94+
; CHECK-NEXT: or %s11, 0, %s9
95+
ret double 1.000000e+00
96+
}
97+
98+
; Function Attrs: nounwind readnone
99+
define fp128 @fcos_quad_zero() {
100+
; CHECK-LABEL: fcos_quad_zero:
101+
; CHECK: .LBB{{[0-9]+}}_2:
102+
; CHECK-NEXT: lea %s0, .LCPI{{[0-9]+}}_0@lo
103+
; CHECK-NEXT: and %s0, %s0, (32)0
104+
; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s0)
105+
; CHECK-NEXT: ld %s0, 8(, %s2)
106+
; CHECK-NEXT: ld %s1, (, %s2)
107+
; CHECK-NEXT: lea %s2, cosl@lo
108+
; CHECK-NEXT: and %s2, %s2, (32)0
109+
; CHECK-NEXT: lea.sl %s12, cosl@hi(, %s2)
110+
; CHECK-NEXT: bsic %s10, (, %s12)
111+
; CHECK-NEXT: or %s11, 0, %s9
112+
%1 = tail call fast fp128 @llvm.cos.f128(fp128 0xL00000000000000000000000000000000)
113+
ret fp128 %1
114+
}
115+
116+
; Function Attrs: norecurse nounwind readnone
117+
define float @fcos_float_const() {
118+
; CHECK-LABEL: fcos_float_const:
119+
; CHECK: .LBB{{[0-9]+}}_2:
120+
; CHECK-NEXT: lea.sl %s0, -1093332685
121+
; CHECK-NEXT: or %s11, 0, %s9
122+
ret float 0xBFDAA22660000000
123+
}
124+
125+
; Function Attrs: norecurse nounwind readnone
126+
define double @fcos_double_const() {
127+
; CHECK-LABEL: fcos_double_const:
128+
; CHECK: .LBB{{[0-9]+}}_2:
129+
; CHECK-NEXT: lea %s0, 1465086469
130+
; CHECK-NEXT: lea.sl %s0, -1076190682(, %s0)
131+
; CHECK-NEXT: or %s11, 0, %s9
132+
ret double 0xBFDAA22657537205
133+
}
134+
135+
; Function Attrs: nounwind readnone
136+
define fp128 @fcos_quad_const() {
137+
; CHECK-LABEL: fcos_quad_const:
138+
; CHECK: .LBB{{[0-9]+}}_2:
139+
; CHECK-NEXT: lea %s0, .LCPI{{[0-9]+}}_0@lo
140+
; CHECK-NEXT: and %s0, %s0, (32)0
141+
; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s0)
142+
; CHECK-NEXT: ld %s0, 8(, %s2)
143+
; CHECK-NEXT: ld %s1, (, %s2)
144+
; CHECK-NEXT: lea %s2, cosl@lo
145+
; CHECK-NEXT: and %s2, %s2, (32)0
146+
; CHECK-NEXT: lea.sl %s12, cosl@hi(, %s2)
147+
; CHECK-NEXT: bsic %s10, (, %s12)
148+
; CHECK-NEXT: or %s11, 0, %s9
149+
%1 = tail call fast fp128 @llvm.cos.f128(fp128 0xL0000000000000000C000000000000000)
150+
ret fp128 %1
151+
}

0 commit comments

Comments
 (0)