Skip to content

Commit fa9f6b5

Browse files
authored
[AArch64][NEON] Add famax/famin codegen patterns (#103027)
- min(abs(a), abs(b)) -> famin(a, b), max(abs(a), abs(b))-> famax(a, b) - Changes to LLVM - llvm/lib/Target/AArch64InstrInfo.td - Add pattern for NEON types - +llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll - Add tests with and without +faminmax flag.
1 parent ba40053 commit fa9f6b5

File tree

2 files changed

+221
-2
lines changed

2 files changed

+221
-2
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10147,10 +10147,20 @@ let Predicates = [HasFP8] in {
1014710147
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
1014810148
} // End let Predicates = [HasFP8]
1014910149

10150-
let Predicates = [HasFAMINMAX] in {
10150+
let Predicates = [HasNEON, HasFAMINMAX] in {
1015110151
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
1015210152
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
10153-
} // End let Predicates = [HasFAMAXMIN]
10153+
10154+
foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
10155+
// Replace min(abs(a), abs(b)) with famin(a, b)
10156+
def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
10157+
(!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
10158+
10159+
// Replace max(abs(a), abs(b)) with famax(a, b)
10160+
def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
10161+
(!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
10162+
}
10163+
} // End let Predicates = [HasNEON, HasFAMINMAX]
1015410164

1015510165
let Predicates = [HasFP8FMA] in {
1015610166
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
3+
; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
; Replace min(abs(a), abs(b)) with famin(a, b)
8+
; Replace max(abs(a), abs(b)) with famax(a, b)
9+
10+
define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
11+
; CHECK-LABEL: test_max_v4f16:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
14+
; CHECK-NEXT: ret
15+
;
16+
; CHECK-NO-FAMINMAX-LABEL: test_max_v4f16:
17+
; CHECK-NO-FAMINMAX: // %bb.0:
18+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
19+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
20+
; CHECK-NO-FAMINMAX-NEXT: fmax v0.4h, v0.4h, v1.4h
21+
; CHECK-NO-FAMINMAX-NEXT: ret
22+
%aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
23+
%ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
24+
%r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %aa, <4 x half> %ab)
25+
ret <4 x half> %r
26+
}
27+
28+
define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
29+
; CHECK-LABEL: test_min_v4f16:
30+
; CHECK: // %bb.0:
31+
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
32+
; CHECK-NEXT: ret
33+
;
34+
; CHECK-NO-FAMINMAX-LABEL: test_min_v4f16:
35+
; CHECK-NO-FAMINMAX: // %bb.0:
36+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
37+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
38+
; CHECK-NO-FAMINMAX-NEXT: fmin v0.4h, v0.4h, v1.4h
39+
; CHECK-NO-FAMINMAX-NEXT: ret
40+
%aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
41+
%ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
42+
%r = call <4 x half> @llvm.minimum.v4f16(<4 x half> %aa, <4 x half> %ab)
43+
ret <4 x half> %r
44+
}
45+
46+
define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
47+
; CHECK-LABEL: test_max_v8f16:
48+
; CHECK: // %bb.0:
49+
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
50+
; CHECK-NEXT: ret
51+
;
52+
; CHECK-NO-FAMINMAX-LABEL: test_max_v8f16:
53+
; CHECK-NO-FAMINMAX: // %bb.0:
54+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
55+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
56+
; CHECK-NO-FAMINMAX-NEXT: fmax v0.8h, v0.8h, v1.8h
57+
; CHECK-NO-FAMINMAX-NEXT: ret
58+
%aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
59+
%ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
60+
%r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %aa, <8 x half> %ab)
61+
ret <8 x half> %r
62+
}
63+
64+
define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
65+
; CHECK-LABEL: test_min_v8f16:
66+
; CHECK: // %bb.0:
67+
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
68+
; CHECK-NEXT: ret
69+
;
70+
; CHECK-NO-FAMINMAX-LABEL: test_min_v8f16:
71+
; CHECK-NO-FAMINMAX: // %bb.0:
72+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
73+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
74+
; CHECK-NO-FAMINMAX-NEXT: fmin v0.8h, v0.8h, v1.8h
75+
; CHECK-NO-FAMINMAX-NEXT: ret
76+
%aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
77+
%ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
78+
%r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %aa, <8 x half> %ab)
79+
ret <8 x half> %r
80+
}
81+
82+
define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
83+
; CHECK-LABEL: test_max_v2f32:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
86+
; CHECK-NEXT: ret
87+
;
88+
; CHECK-NO-FAMINMAX-LABEL: test_max_v2f32:
89+
; CHECK-NO-FAMINMAX: // %bb.0:
90+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
91+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
92+
; CHECK-NO-FAMINMAX-NEXT: fmax v0.2s, v0.2s, v1.2s
93+
; CHECK-NO-FAMINMAX-NEXT: ret
94+
%aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
95+
%ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
96+
%r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %aa, <2 x float> %ab)
97+
ret <2 x float> %r
98+
}
99+
100+
define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
101+
; CHECK-LABEL: test_min_v2f32:
102+
; CHECK: // %bb.0:
103+
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
104+
; CHECK-NEXT: ret
105+
;
106+
; CHECK-NO-FAMINMAX-LABEL: test_min_v2f32:
107+
; CHECK-NO-FAMINMAX: // %bb.0:
108+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
109+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
110+
; CHECK-NO-FAMINMAX-NEXT: fmin v0.2s, v0.2s, v1.2s
111+
; CHECK-NO-FAMINMAX-NEXT: ret
112+
%aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
113+
%ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
114+
%r = call <2 x float> @llvm.minimum.v2f32(<2 x float> %aa, <2 x float> %ab)
115+
ret <2 x float> %r
116+
}
117+
118+
define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
119+
; CHECK-LABEL: test_max_v4f32:
120+
; CHECK: // %bb.0:
121+
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
122+
; CHECK-NEXT: ret
123+
;
124+
; CHECK-NO-FAMINMAX-LABEL: test_max_v4f32:
125+
; CHECK-NO-FAMINMAX: // %bb.0:
126+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
127+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
128+
; CHECK-NO-FAMINMAX-NEXT: fmax v0.4s, v0.4s, v1.4s
129+
; CHECK-NO-FAMINMAX-NEXT: ret
130+
%aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
131+
%ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
132+
%r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %aa, <4 x float> %ab)
133+
ret <4 x float> %r
134+
}
135+
136+
define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
137+
; CHECK-LABEL: test_min_v4f32:
138+
; CHECK: // %bb.0:
139+
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
140+
; CHECK-NEXT: ret
141+
;
142+
; CHECK-NO-FAMINMAX-LABEL: test_min_v4f32:
143+
; CHECK-NO-FAMINMAX: // %bb.0:
144+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
145+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
146+
; CHECK-NO-FAMINMAX-NEXT: fmin v0.4s, v0.4s, v1.4s
147+
; CHECK-NO-FAMINMAX-NEXT: ret
148+
%aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
149+
%ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
150+
%r = call <4 x float> @llvm.minimum.v4f32(<4 x float> %aa, <4 x float> %ab)
151+
ret <4 x float> %r
152+
}
153+
154+
define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
155+
; CHECK-LABEL: test_max_v2f64:
156+
; CHECK: // %bb.0:
157+
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
158+
; CHECK-NEXT: ret
159+
;
160+
; CHECK-NO-FAMINMAX-LABEL: test_max_v2f64:
161+
; CHECK-NO-FAMINMAX: // %bb.0:
162+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
163+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
164+
; CHECK-NO-FAMINMAX-NEXT: fmax v0.2d, v0.2d, v1.2d
165+
; CHECK-NO-FAMINMAX-NEXT: ret
166+
%aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
167+
%ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
168+
%r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %aa, <2 x double> %ab)
169+
ret <2 x double> %r
170+
}
171+
172+
define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
173+
; CHECK-LABEL: test_min_v2f64:
174+
; CHECK: // %bb.0:
175+
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
176+
; CHECK-NEXT: ret
177+
;
178+
; CHECK-NO-FAMINMAX-LABEL: test_min_v2f64:
179+
; CHECK-NO-FAMINMAX: // %bb.0:
180+
; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
181+
; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
182+
; CHECK-NO-FAMINMAX-NEXT: fmin v0.2d, v0.2d, v1.2d
183+
; CHECK-NO-FAMINMAX-NEXT: ret
184+
%aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
185+
%ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
186+
%r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %aa, <2 x double> %ab)
187+
ret <2 x double> %r
188+
}
189+
190+
191+
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
192+
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
193+
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
194+
declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
195+
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
196+
197+
declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
198+
declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
199+
declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
200+
declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
201+
declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
202+
203+
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
204+
declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
205+
declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
206+
declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
207+
declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
208+
209+
attributes #0 = { nounwind "target-features"="+fullfp16" }

0 commit comments

Comments
 (0)