Skip to content

Commit 7515e81

Browse files
author
Sjoerd Meijer
committed
[AArch64] Add some float -> int -> float conversion patterns
This adds some conversion match patterns for which we want to keep the int values in FP registers using the corresponding NEON instructions (not the FP instructions) to avoid more costly int <-> fp register transfers. Differential Revision: https://reviews.llvm.org/D98956
1 parent 02b51e5 commit 7515e81

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4746,6 +4746,27 @@ def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
47464746
def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
47474747
(FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
47484748

4749+
// Some float -> int -> float conversion patterns for which we want to keep the
4750+
// int values in FP registers using the corresponding NEON instructions to
4751+
// avoid more costly int <-> fp register transfers.
4752+
let Predicates = [HasNEON] in {
4753+
def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
4754+
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
4755+
def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
4756+
(SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
4757+
def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
4758+
(UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
4759+
def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
4760+
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
4761+
4762+
let Predicates = [HasFullFP16] in {
4763+
def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
4764+
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
4765+
def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
4766+
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
4767+
}
4768+
}
4769+
47494770
// If an integer is about to be converted to a floating point value,
47504771
// just load it on the floating point unit.
47514772
// Here are the patterns for 8 and 16-bits to float.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 -o - %s -mattr=+neon,+fullfp16 | FileCheck %s
3+
4+
define double @t1(double %x) {
5+
; CHECK-LABEL: t1:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: fcvtzs d0, d0
8+
; CHECK-NEXT: scvtf d0, d0
9+
; CHECK-NEXT: ret
10+
entry:
11+
%conv = fptosi double %x to i64
12+
%conv1 = sitofp i64 %conv to double
13+
ret double %conv1
14+
}
15+
16+
define float @t2(float %x) {
17+
; CHECK-LABEL: t2:
18+
; CHECK: // %bb.0: // %entry
19+
; CHECK-NEXT: fcvtzs s0, s0
20+
; CHECK-NEXT: scvtf s0, s0
21+
; CHECK-NEXT: ret
22+
entry:
23+
%conv = fptosi float %x to i32
24+
%conv1 = sitofp i32 %conv to float
25+
ret float %conv1
26+
}
27+
28+
define half @t3(half %x) {
29+
; CHECK-LABEL: t3:
30+
; CHECK: // %bb.0: // %entry
31+
; CHECK-NEXT: fcvtzs h0, h0
32+
; CHECK-NEXT: scvtf h0, h0
33+
; CHECK-NEXT: ret
34+
entry:
35+
%conv = fptosi half %x to i32
36+
%conv1 = sitofp i32 %conv to half
37+
ret half %conv1
38+
}
39+
40+
define double @t4(double %x) {
41+
; CHECK-LABEL: t4:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: fcvtzu d0, d0
44+
; CHECK-NEXT: ucvtf d0, d0
45+
; CHECK-NEXT: ret
46+
entry:
47+
%conv = fptoui double %x to i64
48+
%conv1 = uitofp i64 %conv to double
49+
ret double %conv1
50+
}
51+
52+
define float @t5(float %x) {
53+
; CHECK-LABEL: t5:
54+
; CHECK: // %bb.0: // %entry
55+
; CHECK-NEXT: fcvtzu s0, s0
56+
; CHECK-NEXT: ucvtf s0, s0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%conv = fptoui float %x to i32
60+
%conv1 = uitofp i32 %conv to float
61+
ret float %conv1
62+
}
63+
64+
define half @t6(half %x) {
65+
; CHECK-LABEL: t6:
66+
; CHECK: // %bb.0: // %entry
67+
; CHECK-NEXT: fcvtzu h0, h0
68+
; CHECK-NEXT: ucvtf h0, h0
69+
; CHECK-NEXT: ret
70+
entry:
71+
%conv = fptoui half %x to i32
72+
%conv1 = uitofp i32 %conv to half
73+
ret half %conv1
74+
}

0 commit comments

Comments
 (0)