Skip to content

Commit ef87075

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Legalize narrow scalar FP arithmetic
Widen narrow fp arithmetic ops (e.g. G_FADD). When we don't have full FP16 support, widen to s32. Otherwise widen to s16. https://godbolt.org/z/TbT9Pqa7e Differential Revision: https://reviews.llvm.org/D108660
1 parent cd2134e commit ef87075

File tree

2 files changed

+158
-1
lines changed

2 files changed

+158
-1
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
194194
.widenScalarToNextPow2(0);
195195

196196
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
197-
.legalFor({s32, s64, v2s64, v4s32, v2s32})
197+
.legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
198+
.clampScalar(0, MinFPScalar, s64)
198199
.clampNumElements(0, v2s32, v4s32)
199200
.clampNumElements(0, v2s64, v2s64);
200201

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=NO-FP16
3+
# RUN: llc -mtriple aarch64-unknown-unknown -verify-machineinstrs -mattr=+fullfp16 -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=FP16
4+
5+
...
6+
---
7+
name: fadd
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0:
11+
liveins: $h0, $h1
12+
13+
; NO-FP16-LABEL: name: fadd
14+
; NO-FP16: liveins: $h0, $h1
15+
; NO-FP16: %x:_(s16) = COPY $h0
16+
; NO-FP16: %y:_(s16) = COPY $h1
17+
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
18+
; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16)
19+
; NO-FP16: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]]
20+
; NO-FP16: %op:_(s16) = G_FPTRUNC [[FADD]](s32)
21+
; NO-FP16: $h0 = COPY %op(s16)
22+
; NO-FP16: RET_ReallyLR implicit $h0
23+
; FP16-LABEL: name: fadd
24+
; FP16: liveins: $h0, $h1
25+
; FP16: %x:_(s16) = COPY $h0
26+
; FP16: %y:_(s16) = COPY $h1
27+
; FP16: %op:_(s16) = G_FADD %x, %y
28+
; FP16: $h0 = COPY %op(s16)
29+
; FP16: RET_ReallyLR implicit $h0
30+
%x:_(s16) = COPY $h0
31+
%y:_(s16) = COPY $h1
32+
%op:_(s16) = G_FADD %x, %y
33+
$h0 = COPY %op(s16)
34+
RET_ReallyLR implicit $h0
35+
36+
...
37+
---
38+
name: fsub
39+
tracksRegLiveness: true
40+
body: |
41+
bb.0:
42+
liveins: $h0, $h1
43+
44+
; NO-FP16-LABEL: name: fsub
45+
; NO-FP16: liveins: $h0, $h1
46+
; NO-FP16: %x:_(s16) = COPY $h0
47+
; NO-FP16: %y:_(s16) = COPY $h1
48+
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
49+
; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16)
50+
; NO-FP16: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FPEXT]], [[FPEXT1]]
51+
; NO-FP16: %op:_(s16) = G_FPTRUNC [[FSUB]](s32)
52+
; NO-FP16: $h0 = COPY %op(s16)
53+
; NO-FP16: RET_ReallyLR implicit $h0
54+
; FP16-LABEL: name: fsub
55+
; FP16: liveins: $h0, $h1
56+
; FP16: %x:_(s16) = COPY $h0
57+
; FP16: %y:_(s16) = COPY $h1
58+
; FP16: %op:_(s16) = G_FSUB %x, %y
59+
; FP16: $h0 = COPY %op(s16)
60+
; FP16: RET_ReallyLR implicit $h0
61+
%x:_(s16) = COPY $h0
62+
%y:_(s16) = COPY $h1
63+
%op:_(s16) = G_FSUB %x, %y
64+
$h0 = COPY %op(s16)
65+
RET_ReallyLR implicit $h0
66+
67+
...
68+
---
69+
name: fmul
70+
tracksRegLiveness: true
71+
body: |
72+
bb.0:
73+
liveins: $h0, $h1
74+
75+
; NO-FP16-LABEL: name: fmul
76+
; NO-FP16: liveins: $h0, $h1
77+
; NO-FP16: %x:_(s16) = COPY $h0
78+
; NO-FP16: %y:_(s16) = COPY $h1
79+
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
80+
; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16)
81+
; NO-FP16: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]]
82+
; NO-FP16: %op:_(s16) = G_FPTRUNC [[FMUL]](s32)
83+
; NO-FP16: $h0 = COPY %op(s16)
84+
; NO-FP16: RET_ReallyLR implicit $h0
85+
; FP16-LABEL: name: fmul
86+
; FP16: liveins: $h0, $h1
87+
; FP16: %x:_(s16) = COPY $h0
88+
; FP16: %y:_(s16) = COPY $h1
89+
; FP16: %op:_(s16) = G_FMUL %x, %y
90+
; FP16: $h0 = COPY %op(s16)
91+
; FP16: RET_ReallyLR implicit $h0
92+
%x:_(s16) = COPY $h0
93+
%y:_(s16) = COPY $h1
94+
%op:_(s16) = G_FMUL %x, %y
95+
$h0 = COPY %op(s16)
96+
RET_ReallyLR implicit $h0
97+
98+
...
99+
---
100+
name: fdiv
101+
tracksRegLiveness: true
102+
body: |
103+
bb.0:
104+
liveins: $h0, $h1
105+
106+
; NO-FP16-LABEL: name: fdiv
107+
; NO-FP16: liveins: $h0, $h1
108+
; NO-FP16: %x:_(s16) = COPY $h0
109+
; NO-FP16: %y:_(s16) = COPY $h1
110+
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
111+
; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16)
112+
; NO-FP16: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
113+
; NO-FP16: %op:_(s16) = G_FPTRUNC [[FDIV]](s32)
114+
; NO-FP16: $h0 = COPY %op(s16)
115+
; NO-FP16: RET_ReallyLR implicit $h0
116+
; FP16-LABEL: name: fdiv
117+
; FP16: liveins: $h0, $h1
118+
; FP16: %x:_(s16) = COPY $h0
119+
; FP16: %y:_(s16) = COPY $h1
120+
; FP16: %op:_(s16) = G_FDIV %x, %y
121+
; FP16: $h0 = COPY %op(s16)
122+
; FP16: RET_ReallyLR implicit $h0
123+
%x:_(s16) = COPY $h0
124+
%y:_(s16) = COPY $h1
125+
%op:_(s16) = G_FDIV %x, %y
126+
$h0 = COPY %op(s16)
127+
RET_ReallyLR implicit $h0
128+
129+
...
130+
---
131+
name: fneg
132+
tracksRegLiveness: true
133+
body: |
134+
bb.0:
135+
liveins: $h0
136+
137+
; NO-FP16-LABEL: name: fneg
138+
; NO-FP16: liveins: $h0
139+
; NO-FP16: %x:_(s16) = COPY $h0
140+
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
141+
; NO-FP16: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]]
142+
; NO-FP16: %op:_(s16) = G_FPTRUNC [[FNEG]](s32)
143+
; NO-FP16: $h0 = COPY %op(s16)
144+
; NO-FP16: RET_ReallyLR implicit $h0
145+
; FP16-LABEL: name: fneg
146+
; FP16: liveins: $h0
147+
; FP16: %x:_(s16) = COPY $h0
148+
; FP16: %op:_(s16) = G_FNEG %x
149+
; FP16: $h0 = COPY %op(s16)
150+
; FP16: RET_ReallyLR implicit $h0
151+
%x:_(s16) = COPY $h0
152+
%op:_(s16) = G_FNEG %x
153+
$h0 = COPY %op(s16)
154+
RET_ReallyLR implicit $h0
155+
156+
...

0 commit comments

Comments
 (0)