Skip to content

Commit edd7b19

Browse files
committed
AArch64: Add FCANONICALIZE
FMINNM/FMAXNM instructions of AArch64 follow IEEE754-2008. We can use them to canonicalize a floating point number.
1 parent 8d35ab8 commit edd7b19

File tree

3 files changed

+745
-0
lines changed

3 files changed

+745
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5063,6 +5063,12 @@ def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
50635063
def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
50645064
(FMAXNMHrr FPR16:$a, FPR16:$b)>;
50655065

5066+
def : Pat<(f16 (fcanonicalize f16:$a)),
5067+
(FMINNMHrr f16:$a, f16:$a)>;
5068+
def : Pat<(f32 (fcanonicalize f32:$a)),
5069+
(FMINNMSrr f32:$a, f32:$a)>;
5070+
def : Pat<(f64 (fcanonicalize f64:$a)),
5071+
(FMINNMDrr f64:$a, f64:$a)>;
50665072
//===----------------------------------------------------------------------===//
50675073
// Floating point three operand instructions.
50685074
//===----------------------------------------------------------------------===//
@@ -5588,6 +5594,17 @@ def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
55885594
def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
55895595
(v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
55905596

5597+
def : Pat<(v2f64 (fcanonicalize (v2f64 V128:$Rn))),
5598+
(v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rn)))>;
5599+
def : Pat<(v4f32 (fcanonicalize (v4f32 V128:$Rn))),
5600+
(v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rn)))>;
5601+
def : Pat<(v8f16 (fcanonicalize (v8f16 V128:$Rn))),
5602+
(v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rn)))>;
5603+
def : Pat<(v2f32 (fcanonicalize (v2f32 V64:$Rn))),
5604+
(v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rn)))>;
5605+
def : Pat<(v4f16 (fcanonicalize (v4f16 V64:$Rn))),
5606+
(v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rn)))>;
5607+
55915608
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
55925609
// instruction expects the addend first, while the fma intrinsic puts it last.
55935610
defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64
3+
4+
declare half @llvm.fcanonicalize.f16(half)
5+
declare float @llvm.fcanonicalize.f32(float)
6+
declare double @llvm.fcanonicalize.f64(double)
7+
8+
define half @fcanonicalize_half(half %x) {
9+
; AARCH64-LABEL: fcanonicalize_half:
10+
; AARCH64: // %bb.0:
11+
; AARCH64-NEXT: fminnm h0, h0, h0
12+
; AARCH64-NEXT: ret
13+
%z = call half @llvm.canonicalize.f16(half %x)
14+
ret half %z
15+
}
16+
17+
define half @fcanonicalize_half_nnan(half %x) {
18+
; AARCH64-LABEL: fcanonicalize_half_nnan:
19+
; AARCH64: // %bb.0:
20+
; AARCH64-NEXT: fminnm h0, h0, h0
21+
; AARCH64-NEXT: ret
22+
%z = call nnan half @llvm.canonicalize.f16(half %x)
23+
ret half %z
24+
}
25+
26+
define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
27+
; AARCH64-LABEL: fcanonicalize_v2f16:
28+
; AARCH64: // %bb.0:
29+
; AARCH64-NEXT: fminnm v0.4h, v0.4h, v0.4h
30+
; AARCH64-NEXT: ret
31+
%z = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
32+
ret <2 x half> %z
33+
}
34+
35+
define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
36+
; AARCH64-LABEL: fcanonicalize_v2f16_nnan:
37+
; AARCH64: // %bb.0:
38+
; AARCH64-NEXT: fminnm v0.4h, v0.4h, v0.4h
39+
; AARCH64-NEXT: ret
40+
%z = call nnan <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
41+
ret <2 x half> %z
42+
}
43+
44+
define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
45+
; AARCH64-LABEL: fcanonicalize_v4f16:
46+
; AARCH64: // %bb.0:
47+
; AARCH64-NEXT: fminnm v0.4h, v0.4h, v0.4h
48+
; AARCH64-NEXT: ret
49+
%z = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
50+
ret <4 x half> %z
51+
}
52+
53+
define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
54+
; AARCH64-LABEL: fcanonicalize_v4f16_nnan:
55+
; AARCH64: // %bb.0:
56+
; AARCH64-NEXT: fminnm v0.4h, v0.4h, v0.4h
57+
; AARCH64-NEXT: ret
58+
%z = call nnan <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
59+
ret <4 x half> %z
60+
}
61+
62+
define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
63+
; AARCH64-LABEL: fcanonicalize_v8f16:
64+
; AARCH64: // %bb.0:
65+
; AARCH64-NEXT: fminnm v0.8h, v0.8h, v0.8h
66+
; AARCH64-NEXT: ret
67+
%z = call <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
68+
ret <8 x half> %z
69+
}
70+
71+
define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
72+
; AARCH64-LABEL: fcanonicalize_v8f16_nnan:
73+
; AARCH64: // %bb.0:
74+
; AARCH64-NEXT: fminnm v0.8h, v0.8h, v0.8h
75+
; AARCH64-NEXT: ret
76+
%z = call nnan <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
77+
ret <8 x half> %z
78+
}
79+
80+
define float @fcanonicalize_float(float %x) {
81+
; AARCH64-LABEL: fcanonicalize_float:
82+
; AARCH64: // %bb.0:
83+
; AARCH64-NEXT: fminnm s0, s0, s0
84+
; AARCH64-NEXT: ret
85+
%z = call float @llvm.canonicalize.f32(float %x)
86+
ret float %z
87+
}
88+
89+
define float @fcanonicalize_float_nnan(float %x) {
90+
; AARCH64-LABEL: fcanonicalize_float_nnan:
91+
; AARCH64: // %bb.0:
92+
; AARCH64-NEXT: fminnm s0, s0, s0
93+
; AARCH64-NEXT: ret
94+
%z = call nnan float @llvm.canonicalize.f32(float %x)
95+
ret float %z
96+
}
97+
98+
define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
99+
; AARCH64-LABEL: fcanonicalize_v2f32:
100+
; AARCH64: // %bb.0:
101+
; AARCH64-NEXT: fminnm v0.2s, v0.2s, v0.2s
102+
; AARCH64-NEXT: ret
103+
%z = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
104+
ret <2 x float> %z
105+
}
106+
107+
define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
108+
; AARCH64-LABEL: fcanonicalize_v2f32_nnan:
109+
; AARCH64: // %bb.0:
110+
; AARCH64-NEXT: fminnm v0.2s, v0.2s, v0.2s
111+
; AARCH64-NEXT: ret
112+
%z = call nnan <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
113+
ret <2 x float> %z
114+
}
115+
116+
define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
117+
; AARCH64-LABEL: fcanonicalize_v4f32:
118+
; AARCH64: // %bb.0:
119+
; AARCH64-NEXT: fminnm v0.4s, v0.4s, v0.4s
120+
; AARCH64-NEXT: ret
121+
%z = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
122+
ret <4 x float> %z
123+
}
124+
125+
define <4 x float> @fcanonicalize_v4f32_nnan(<4 x float> %x) {
126+
; AARCH64-LABEL: fcanonicalize_v4f32_nnan:
127+
; AARCH64: // %bb.0:
128+
; AARCH64-NEXT: fminnm v0.4s, v0.4s, v0.4s
129+
; AARCH64-NEXT: ret
130+
%z = call nnan <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
131+
ret <4 x float> %z
132+
}
133+
134+
define double @fcanonicalize_double(double %x) {
135+
; AARCH64-LABEL: fcanonicalize_double:
136+
; AARCH64: // %bb.0:
137+
; AARCH64-NEXT: fminnm d0, d0, d0
138+
; AARCH64-NEXT: ret
139+
%z = call double @llvm.canonicalize.f64(double %x)
140+
ret double %z
141+
}
142+
143+
define double @fcanonicalize_double_nnan(double %x) {
144+
; AARCH64-LABEL: fcanonicalize_double_nnan:
145+
; AARCH64: // %bb.0:
146+
; AARCH64-NEXT: fminnm d0, d0, d0
147+
; AARCH64-NEXT: ret
148+
%z = call nnan double @llvm.canonicalize.f64(double %x)
149+
ret double %z
150+
}
151+
152+
define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
153+
; AARCH64-LABEL: fcanonicalize_v2f64:
154+
; AARCH64: // %bb.0:
155+
; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
156+
; AARCH64-NEXT: ret
157+
%z = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
158+
ret <2 x double> %z
159+
}
160+
161+
define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
162+
; AARCH64-LABEL: fcanonicalize_v2f64_nnan:
163+
; AARCH64: // %bb.0:
164+
; AARCH64-NEXT: fminnm v0.2d, v0.2d, v0.2d
165+
; AARCH64-NEXT: ret
166+
%z = call nnan <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
167+
ret <2 x double> %z
168+
}

0 commit comments

Comments
 (0)