Skip to content

Commit 1ea8cae

Browse files
[AArch64] Add patterns for conversions using fixed-point scvtf (#92922)
1 parent 9377412 commit 1ea8cae

File tree

3 files changed

+130
-1
lines changed

3 files changed

+130
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14365,7 +14365,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1436514365
unsigned Opc =
1436614366
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
1436714367
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
14368-
DAG.getConstant(Cnt, DL, MVT::i32));
14368+
DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
1436914369
}
1437014370

1437114371
// Right shift register. Note, there is not a shift right register

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,12 @@ def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
733733
def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
734734

735735
def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
736+
737+
def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs),
738+
(AArch64vashr node:$lhs, node:$rhs), [{
739+
return N->getFlags().hasExact();
740+
}]>;
741+
736742
def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
737743
def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
738744
def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
@@ -7710,6 +7716,25 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
77107716
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
77117717
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
77127718

7719+
let Predicates = [HasNEON] in {
7720+
def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))),
7721+
(SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>;
7722+
7723+
def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))),
7724+
(SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>;
7725+
7726+
def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))),
7727+
(SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>;
7728+
}
7729+
7730+
let Predicates = [HasNEON, HasFullFP16] in {
7731+
def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))),
7732+
(SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>;
7733+
7734+
def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))),
7735+
(SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
7736+
}
7737+
77137738
// X << 1 ==> X + X
77147739
class SHLToADDPat<ValueType ty, RegisterClass regtype>
77157740
: Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64"
5+
6+
; First some corner cases
7+
define <4 x float> @f_v4_s0(<4 x i32> %u) {
8+
; CHECK-LABEL: f_v4_s0:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: scvtf v0.4s, v0.4s
11+
; CHECK-NEXT: ret
12+
%s = ashr exact <4 x i32> %u, <i32 0, i32 0, i32 0, i32 0>
13+
%v = sitofp <4 x i32> %s to <4 x float>
14+
ret <4 x float> %v
15+
}
16+
17+
define <4 x float> @f_v4_s1(<4 x i32> %u) {
18+
; CHECK-LABEL: f_v4_s1:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: scvtf v0.4s, v0.4s, #1
21+
; CHECK-NEXT: ret
22+
%s = ashr exact <4 x i32> %u, <i32 1, i32 1, i32 1, i32 1>
23+
%v = sitofp <4 x i32> %s to <4 x float>
24+
ret <4 x float> %v
25+
}
26+
27+
define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) {
28+
; CHECK-LABEL: f_v4_s24_inexact:
29+
; CHECK: // %bb.0:
30+
; CHECK-NEXT: sshr v0.4s, v0.4s, #24
31+
; CHECK-NEXT: scvtf v0.4s, v0.4s
32+
; CHECK-NEXT: ret
33+
%s = ashr <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
34+
%v = sitofp <4 x i32> %s to <4 x float>
35+
ret <4 x float> %v
36+
}
37+
38+
define <4 x float> @f_v4_s31(<4 x i32> %u) {
39+
; CHECK-LABEL: f_v4_s31:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
42+
; CHECK-NEXT: scvtf v0.4s, v0.4s
43+
; CHECK-NEXT: ret
44+
%s = ashr <4 x i32> %u, <i32 31, i32 31, i32 31, i32 31>
45+
%v = sitofp <4 x i32> %s to <4 x float>
46+
ret <4 x float> %v
47+
}
48+
49+
; Common cases for conversion from signed integer to floating point types
50+
define <2 x float> @f_v2_s24(<2 x i32> %u) {
51+
; CHECK-LABEL: f_v2_s24:
52+
; CHECK: // %bb.0:
53+
; CHECK-NEXT: scvtf v0.2s, v0.2s, #24
54+
; CHECK-NEXT: ret
55+
%s = ashr exact <2 x i32> %u, <i32 24, i32 24>
56+
%v = sitofp <2 x i32> %s to <2 x float>
57+
ret <2 x float> %v
58+
}
59+
60+
define <4 x float> @f_v4_s24(<4 x i32> %u) {
61+
; CHECK-LABEL: f_v4_s24:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: scvtf v0.4s, v0.4s, #24
64+
; CHECK-NEXT: ret
65+
%s = ashr exact <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
66+
%v = sitofp <4 x i32> %s to <4 x float>
67+
ret <4 x float> %v
68+
}
69+
70+
; Check legalisation to <2 x f64> does not get in the way
71+
define <8 x double> @d_v8_s64(<8 x i64> %u) {
72+
; CHECK-LABEL: d_v8_s64:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: scvtf v0.2d, v0.2d, #56
75+
; CHECK-NEXT: scvtf v1.2d, v1.2d, #56
76+
; CHECK-NEXT: scvtf v2.2d, v2.2d, #56
77+
; CHECK-NEXT: scvtf v3.2d, v3.2d, #56
78+
; CHECK-NEXT: ret
79+
%s = ashr exact <8 x i64> %u, <i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56>
80+
%v = sitofp <8 x i64> %s to <8 x double>
81+
ret <8 x double> %v
82+
}
83+
84+
define <4 x half> @h_v4_s8(<4 x i16> %u) #0 {
85+
; CHECK-LABEL: h_v4_s8:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: scvtf v0.4h, v0.4h, #8
88+
; CHECK-NEXT: ret
89+
%s = ashr exact <4 x i16> %u, <i16 8, i16 8, i16 8, i16 8>
90+
%v = sitofp <4 x i16> %s to <4 x half>
91+
ret <4 x half> %v
92+
}
93+
94+
define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
95+
; CHECK-LABEL: h_v8_s8:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: scvtf v0.8h, v0.8h, #8
98+
; CHECK-NEXT: ret
99+
%s = ashr exact <8 x i16> %u, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
100+
%v = sitofp <8 x i16> %s to <8 x half>
101+
ret <8 x half> %v
102+
}
103+
104+
attributes #0 = { "target-features"="+fullfp16"}

0 commit comments

Comments
 (0)