Skip to content

Commit 256abfe

Browse files
committed
[Xtensa] Add fp16 conversion support
Close #91
1 parent c48ffc9 commit 256abfe

File tree

2 files changed

+178
-1
lines changed

2 files changed

+178
-1
lines changed

llvm/lib/Target/Xtensa/XtensaISelLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,11 +379,22 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
379379
// Needed so that we don't try to implement f128 constant loads using
380380
// a load-and-extend of a f80 constant (in cases where the constant
381381
// would fit in an f80).
382-
for (MVT VT : MVT::fp_valuetypes())
382+
for (MVT VT : MVT::fp_valuetypes()) {
383+
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
384+
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
385+
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
383386
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
387+
}
388+
389+
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390+
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
391+
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
392+
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
384393

385394
// Floating-point truncation and stores need to be done separately.
386395
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
396+
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
397+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
387398

388399
// Implement custom stack allocations
389400
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);

llvm/test/CodeGen/Xtensa/fp16.ll

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=xtensa -mcpu=esp32 < %s | FileCheck --check-prefix=CHECK-ESP32 %s
3+
; RUN: llc -mtriple=xtensa -mcpu=esp32s3 < %s | FileCheck --check-prefix=CHECK-ESP32S3 %s
4+
; RUN: llc -mtriple=xtensa -mcpu=esp32s2 < %s | FileCheck --check-prefix=CHECK-ESP32S2 %s
5+
6+
target datalayout = "e-m:e-p:32:32-v1:8:8-i64:64-i128:128-n32"
7+
target triple = "xtensa"
8+
9+
@x = global i16 12902
10+
@y = global i16 0
11+
@z = common global i16 0
12+
13+
define void @foo() nounwind {
14+
; CHECK-ESP32-LABEL: foo:
15+
; CHECK-ESP32: entry a1, 32
16+
; CHECK-ESP32-NEXT: l32r a6, .LCPI0_0
17+
; CHECK-ESP32-NEXT: l16ui a10, a6, 0
18+
; CHECK-ESP32-NEXT: l32r a5, .LCPI0_1
19+
; CHECK-ESP32-NEXT: callx8 a5
20+
; CHECK-ESP32-NEXT: mov.n a7, a10
21+
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_2
22+
; CHECK-ESP32-NEXT: l16ui a10, a8, 0
23+
; CHECK-ESP32-NEXT: callx8 a5
24+
; CHECK-ESP32-NEXT: wfr f8, a10
25+
; CHECK-ESP32-NEXT: wfr f9, a7
26+
; CHECK-ESP32-NEXT: add.s f8, f9, f8
27+
; CHECK-ESP32-NEXT: rfr a10, f8
28+
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_3
29+
; CHECK-ESP32-NEXT: callx8 a8
30+
; CHECK-ESP32-NEXT: s16i a10, a6, 0
31+
; CHECK-ESP32-NEXT: retw.n
32+
;
33+
; CHECK-ESP32S3-LABEL: foo:
34+
; CHECK-ESP32S3: entry a1, 32
35+
; CHECK-ESP32S3-NEXT: l32r a6, .LCPI0_0
36+
; CHECK-ESP32S3-NEXT: l16ui a10, a6, 0
37+
; CHECK-ESP32S3-NEXT: l32r a5, .LCPI0_1
38+
; CHECK-ESP32S3-NEXT: callx8 a5
39+
; CHECK-ESP32S3-NEXT: mov.n a7, a10
40+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_2
41+
; CHECK-ESP32S3-NEXT: l16ui a10, a8, 0
42+
; CHECK-ESP32S3-NEXT: callx8 a5
43+
; CHECK-ESP32S3-NEXT: wfr f8, a10
44+
; CHECK-ESP32S3-NEXT: wfr f9, a7
45+
; CHECK-ESP32S3-NEXT: add.s f8, f9, f8
46+
; CHECK-ESP32S3-NEXT: rfr a10, f8
47+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_3
48+
; CHECK-ESP32S3-NEXT: callx8 a8
49+
; CHECK-ESP32S3-NEXT: s16i a10, a6, 0
50+
; CHECK-ESP32S3-NEXT: retw.n
51+
;
52+
; CHECK-ESP32S2-LABEL: foo:
53+
; CHECK-ESP32S2: entry a1, 32
54+
; CHECK-ESP32S2-NEXT: l32r a6, .LCPI0_0
55+
; CHECK-ESP32S2-NEXT: l16ui a10, a6, 0
56+
; CHECK-ESP32S2-NEXT: l32r a5, .LCPI0_1
57+
; CHECK-ESP32S2-NEXT: callx8 a5
58+
; CHECK-ESP32S2-NEXT: mov.n a7, a10
59+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_2
60+
; CHECK-ESP32S2-NEXT: l16ui a10, a8, 0
61+
; CHECK-ESP32S2-NEXT: callx8 a5
62+
; CHECK-ESP32S2-NEXT: mov.n a11, a10
63+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_3
64+
; CHECK-ESP32S2-NEXT: mov.n a10, a7
65+
; CHECK-ESP32S2-NEXT: callx8 a8
66+
; CHECK-ESP32S2-NEXT: l32r a7, .LCPI0_4
67+
; CHECK-ESP32S2-NEXT: callx8 a7
68+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_5
69+
; CHECK-ESP32S2-NEXT: and a10, a10, a8
70+
; CHECK-ESP32S2-NEXT: callx8 a5
71+
; CHECK-ESP32S2-NEXT: callx8 a7
72+
; CHECK-ESP32S2-NEXT: s16i a10, a6, 0
73+
; CHECK-ESP32S2-NEXT: retw.n
74+
entry:
75+
%0 = load i16, ptr @x, align 2
76+
%1 = load i16, ptr @y, align 2
77+
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
78+
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
79+
%4 = fadd float %2, %3
80+
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
81+
store i16 %5, ptr @x, align 2
82+
ret void
83+
}
84+
85+
define double @test_from_fp16(i16 %in) {
86+
; CHECK-ESP32-LABEL: test_from_fp16:
87+
; CHECK-ESP32: entry a1, 32
88+
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_0
89+
; CHECK-ESP32-NEXT: mov.n a10, a2
90+
; CHECK-ESP32-NEXT: callx8 a8
91+
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_1
92+
; CHECK-ESP32-NEXT: callx8 a8
93+
; CHECK-ESP32-NEXT: mov.n a2, a10
94+
; CHECK-ESP32-NEXT: mov.n a3, a11
95+
; CHECK-ESP32-NEXT: retw.n
96+
;
97+
; CHECK-ESP32S3-LABEL: test_from_fp16:
98+
; CHECK-ESP32S3: entry a1, 32
99+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_0
100+
; CHECK-ESP32S3-NEXT: mov.n a10, a2
101+
; CHECK-ESP32S3-NEXT: callx8 a8
102+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_1
103+
; CHECK-ESP32S3-NEXT: callx8 a8
104+
; CHECK-ESP32S3-NEXT: mov.n a2, a10
105+
; CHECK-ESP32S3-NEXT: mov.n a3, a11
106+
; CHECK-ESP32S3-NEXT: retw.n
107+
;
108+
; CHECK-ESP32S2-LABEL: test_from_fp16:
109+
; CHECK-ESP32S2: entry a1, 32
110+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_0
111+
; CHECK-ESP32S2-NEXT: and a10, a2, a8
112+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_1
113+
; CHECK-ESP32S2-NEXT: callx8 a8
114+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_2
115+
; CHECK-ESP32S2-NEXT: callx8 a8
116+
; CHECK-ESP32S2-NEXT: mov.n a2, a10
117+
; CHECK-ESP32S2-NEXT: mov.n a3, a11
118+
; CHECK-ESP32S2-NEXT: retw.n
119+
%val = call double @llvm.convert.from.fp16.f64(i16 %in)
120+
ret double %val
121+
}
122+
123+
define i16 @test_to_fp16(double %in) {
124+
; CHECK-ESP32-LABEL: test_to_fp16:
125+
; CHECK-ESP32: entry a1, 32
126+
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_0
127+
; CHECK-ESP32-NEXT: mov.n a10, a2
128+
; CHECK-ESP32-NEXT: mov.n a11, a3
129+
; CHECK-ESP32-NEXT: callx8 a8
130+
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_1
131+
; CHECK-ESP32-NEXT: and a2, a10, a8
132+
; CHECK-ESP32-NEXT: retw.n
133+
;
134+
; CHECK-ESP32S3-LABEL: test_to_fp16:
135+
; CHECK-ESP32S3: entry a1, 32
136+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_0
137+
; CHECK-ESP32S3-NEXT: mov.n a10, a2
138+
; CHECK-ESP32S3-NEXT: mov.n a11, a3
139+
; CHECK-ESP32S3-NEXT: callx8 a8
140+
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_1
141+
; CHECK-ESP32S3-NEXT: and a2, a10, a8
142+
; CHECK-ESP32S3-NEXT: retw.n
143+
;
144+
; CHECK-ESP32S2-LABEL: test_to_fp16:
145+
; CHECK-ESP32S2: entry a1, 32
146+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_0
147+
; CHECK-ESP32S2-NEXT: mov.n a10, a2
148+
; CHECK-ESP32S2-NEXT: mov.n a11, a3
149+
; CHECK-ESP32S2-NEXT: callx8 a8
150+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_1
151+
; CHECK-ESP32S2-NEXT: and a10, a10, a8
152+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_2
153+
; CHECK-ESP32S2-NEXT: callx8 a8
154+
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_3
155+
; CHECK-ESP32S2-NEXT: callx8 a8
156+
; CHECK-ESP32S2-NEXT: mov.n a2, a10
157+
; CHECK-ESP32S2-NEXT: retw.n
158+
%val = call i16 @llvm.convert.to.fp16.f64(double %in)
159+
ret i16 %val
160+
}
161+
162+
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
163+
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
164+
165+
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
166+
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone

0 commit comments

Comments
 (0)