Skip to content

Commit 5812fcd

Browse files
[AArch64] Codegen for new SCVTF/UCVTF variants (FEAT_FPRCVT)
1 parent dac49e8 commit 5812fcd

File tree

3 files changed

+298
-4
lines changed

3 files changed

+298
-4
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5487,7 +5487,7 @@ multiclass IntegerToFP<bits<2> rmode, bits<3> opcode, string asm, SDPatternOpera
54875487
}
54885488
}
54895489

5490-
multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> {
5490+
multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator op, SDPatternOperator node = null_frag> {
54915491
// 32-bit to half-precision
54925492
def HSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR16, f16, asm, node> {
54935493
let Inst{31} = 0; // 32-bit FPR flag
@@ -5511,6 +5511,18 @@ multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPa
55115511
let Inst{31} = 1; // 64-bit FPR flag
55125512
let Inst{23-22} = 0b00; // 32-bit FPR flag
55135513
}
5514+
5515+
def : Pat<(f16 (op (i32 FPR32:$Rn))),
5516+
(!cast<Instruction>(NAME # HSr) $Rn)>;
5517+
5518+
def : Pat<(f16 (op (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
5519+
(!cast<Instruction>(NAME # HSr) (EXTRACT_SUBREG $Rn, ssub))>;
5520+
def : Pat<(f64 (op (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
5521+
(!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG $Rn, ssub))>;
5522+
def : Pat<(f16 (op (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
5523+
(!cast<Instruction>(NAME # HDr) (EXTRACT_SUBREG $Rn, dsub))>;
5524+
def : Pat<(f32 (op (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
5525+
(!cast<Instruction>(NAME # SDr) (EXTRACT_SUBREG $Rn, dsub))>;
55145526
}
55155527

55165528
//---
@@ -13270,4 +13282,3 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
1327013282
let Predicates = [HasNEON, HasF8F32MM];
1327113283
}
1327213284
}
13273-

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5068,8 +5068,8 @@ defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
50685068
defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
50695069

50705070
let Predicates = [HasNEON, HasFPRCVT] in {
5071-
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
5072-
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
5071+
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
5072+
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
50735073
}
50745074

50755075
def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s
3+
; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
8+
; To demonstrate what we have implemented, we'll want a scalar integer value in a SIMD/FP register.
9+
; A common case for this setup is when using the result of an integer reduction intrinsic.
10+
11+
; SCVTF
12+
13+
define half @scvtf_f16i32(<4 x i32> %x) {
14+
; CHECK-LABEL: scvtf_f16i32:
15+
; CHECK: // %bb.0:
16+
; CHECK-NEXT: scvtf h0, s0
17+
; CHECK-NEXT: ret
18+
;
19+
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32:
20+
; CHECK-NO-FPRCVT: // %bb.0:
21+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, s0
22+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
23+
; CHECK-NO-FPRCVT-NEXT: ret
24+
%extract = extractelement <4 x i32> %x, i64 0
25+
%conv = sitofp i32 %extract to half
26+
ret half %conv
27+
}
28+
29+
define half @scvtf_f16i32_neg(<4 x i32> %x) {
30+
; CHECK-LABEL: scvtf_f16i32_neg:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: mov w8, v0.s[1]
33+
; CHECK-NEXT: scvtf h0, w8
34+
; CHECK-NEXT: ret
35+
;
36+
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32_neg:
37+
; CHECK-NO-FPRCVT: // %bb.0:
38+
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
39+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, w8
40+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
41+
; CHECK-NO-FPRCVT-NEXT: ret
42+
%extract = extractelement <4 x i32> %x, i64 1
43+
%conv = sitofp i32 %extract to half
44+
ret half %conv
45+
}
46+
47+
define double @scvtf_f64i32(<4 x i32> %x) {
48+
; CHECK-LABEL: scvtf_f64i32:
49+
; CHECK: // %bb.0:
50+
; CHECK-NEXT: scvtf d0, s0
51+
; CHECK-NEXT: ret
52+
;
53+
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32:
54+
; CHECK-NO-FPRCVT: // %bb.0:
55+
; CHECK-NO-FPRCVT-NEXT: fmov w8, s0
56+
; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8
57+
; CHECK-NO-FPRCVT-NEXT: ret
58+
%extract = extractelement <4 x i32> %x, i64 0
59+
%conv = sitofp i32 %extract to double
60+
ret double %conv
61+
}
62+
63+
define double @scvtf_f64i32_neg(<4 x i32> %x) {
64+
; CHECK-LABEL: scvtf_f64i32_neg:
65+
; CHECK: // %bb.0:
66+
; CHECK-NEXT: mov w8, v0.s[1]
67+
; CHECK-NEXT: scvtf d0, w8
68+
; CHECK-NEXT: ret
69+
;
70+
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_neg:
71+
; CHECK-NO-FPRCVT: // %bb.0:
72+
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
73+
; CHECK-NO-FPRCVT-NEXT: scvtf d0, w8
74+
; CHECK-NO-FPRCVT-NEXT: ret
75+
%extract = extractelement <4 x i32> %x, i64 1
76+
%conv = sitofp i32 %extract to double
77+
ret double %conv
78+
}
79+
80+
define half @scvtf_f16i64(<2 x i64> %x) {
81+
; CHECK-LABEL: scvtf_f16i64:
82+
; CHECK: // %bb.0:
83+
; CHECK-NEXT: scvtf h0, d0
84+
; CHECK-NEXT: ret
85+
;
86+
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64:
87+
; CHECK-NO-FPRCVT: // %bb.0:
88+
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
89+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
90+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
91+
; CHECK-NO-FPRCVT-NEXT: ret
92+
%extract = extractelement <2 x i64> %x, i64 0
93+
%conv = sitofp i64 %extract to half
94+
ret half %conv
95+
}
96+
97+
define half @scvtf_f16i64_neg(<2 x i64> %x) {
98+
; CHECK-LABEL: scvtf_f16i64_neg:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: mov x8, v0.d[1]
101+
; CHECK-NEXT: scvtf h0, x8
102+
; CHECK-NEXT: ret
103+
;
104+
; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64_neg:
105+
; CHECK-NO-FPRCVT: // %bb.0:
106+
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
107+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
108+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
109+
; CHECK-NO-FPRCVT-NEXT: ret
110+
%extract = extractelement <2 x i64> %x, i64 1
111+
%conv = sitofp i64 %extract to half
112+
ret half %conv
113+
}
114+
115+
define float @scvtf_f32i64(<2 x i64> %x) {
116+
; CHECK-LABEL: scvtf_f32i64:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: scvtf s0, d0
119+
; CHECK-NEXT: ret
120+
;
121+
; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64:
122+
; CHECK-NO-FPRCVT: // %bb.0:
123+
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
124+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
125+
; CHECK-NO-FPRCVT-NEXT: ret
126+
%extract = extractelement <2 x i64> %x, i64 0
127+
%conv = sitofp i64 %extract to float
128+
ret float %conv
129+
}
130+
131+
define float @scvtf_f32i64_neg(<2 x i64> %x) {
132+
; CHECK-LABEL: scvtf_f32i64_neg:
133+
; CHECK: // %bb.0:
134+
; CHECK-NEXT: mov x8, v0.d[1]
135+
; CHECK-NEXT: scvtf s0, x8
136+
; CHECK-NEXT: ret
137+
;
138+
; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_neg:
139+
; CHECK-NO-FPRCVT: // %bb.0:
140+
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
141+
; CHECK-NO-FPRCVT-NEXT: scvtf s0, x8
142+
; CHECK-NO-FPRCVT-NEXT: ret
143+
%extract = extractelement <2 x i64> %x, i64 1
144+
%conv = sitofp i64 %extract to float
145+
ret float %conv
146+
}
147+
148+
; UCVTF
149+
150+
define half @ucvtf_f16i32(<4 x i32> %x) {
151+
; CHECK-LABEL: ucvtf_f16i32:
152+
; CHECK: // %bb.0:
153+
; CHECK-NEXT: ucvtf h0, s0
154+
; CHECK-NEXT: ret
155+
;
156+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32:
157+
; CHECK-NO-FPRCVT: // %bb.0:
158+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, s0
159+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
160+
; CHECK-NO-FPRCVT-NEXT: ret
161+
%extract = extractelement <4 x i32> %x, i64 0
162+
%conv = uitofp i32 %extract to half
163+
ret half %conv
164+
}
165+
166+
define half @ucvtf_f16i32_neg(<4 x i32> %x) {
167+
; CHECK-LABEL: ucvtf_f16i32_neg:
168+
; CHECK: // %bb.0:
169+
; CHECK-NEXT: mov w8, v0.s[1]
170+
; CHECK-NEXT: ucvtf h0, w8
171+
; CHECK-NEXT: ret
172+
;
173+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32_neg:
174+
; CHECK-NO-FPRCVT: // %bb.0:
175+
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
176+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, w8
177+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
178+
; CHECK-NO-FPRCVT-NEXT: ret
179+
%extract = extractelement <4 x i32> %x, i64 1
180+
%conv = uitofp i32 %extract to half
181+
ret half %conv
182+
}
183+
184+
define double @ucvtf_f64i32(<4 x i32> %x) {
185+
; CHECK-LABEL: ucvtf_f64i32:
186+
; CHECK: // %bb.0:
187+
; CHECK-NEXT: ucvtf d0, s0
188+
; CHECK-NEXT: ret
189+
;
190+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32:
191+
; CHECK-NO-FPRCVT: // %bb.0:
192+
; CHECK-NO-FPRCVT-NEXT: fmov w8, s0
193+
; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8
194+
; CHECK-NO-FPRCVT-NEXT: ret
195+
%extract = extractelement <4 x i32> %x, i64 0
196+
%conv = uitofp i32 %extract to double
197+
ret double %conv
198+
}
199+
200+
define double @ucvtf_f64i32_neg(<4 x i32> %x) {
201+
; CHECK-LABEL: ucvtf_f64i32_neg:
202+
; CHECK: // %bb.0:
203+
; CHECK-NEXT: mov w8, v0.s[1]
204+
; CHECK-NEXT: ucvtf d0, w8
205+
; CHECK-NEXT: ret
206+
;
207+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_neg:
208+
; CHECK-NO-FPRCVT: // %bb.0:
209+
; CHECK-NO-FPRCVT-NEXT: mov w8, v0.s[1]
210+
; CHECK-NO-FPRCVT-NEXT: ucvtf d0, w8
211+
; CHECK-NO-FPRCVT-NEXT: ret
212+
%extract = extractelement <4 x i32> %x, i64 1
213+
%conv = uitofp i32 %extract to double
214+
ret double %conv
215+
}
216+
217+
define half @ucvtf_f16i64(<2 x i64> %x) {
218+
; CHECK-LABEL: ucvtf_f16i64:
219+
; CHECK: // %bb.0:
220+
; CHECK-NEXT: ucvtf h0, d0
221+
; CHECK-NEXT: ret
222+
;
223+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64:
224+
; CHECK-NO-FPRCVT: // %bb.0:
225+
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
226+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
227+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
228+
; CHECK-NO-FPRCVT-NEXT: ret
229+
%extract = extractelement <2 x i64> %x, i64 0
230+
%conv = uitofp i64 %extract to half
231+
ret half %conv
232+
}
233+
234+
define half @ucvtf_f16i64_neg(<2 x i64> %x) {
235+
; CHECK-LABEL: ucvtf_f16i64_neg:
236+
; CHECK: // %bb.0:
237+
; CHECK-NEXT: mov x8, v0.d[1]
238+
; CHECK-NEXT: ucvtf h0, x8
239+
; CHECK-NEXT: ret
240+
;
241+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64_neg:
242+
; CHECK-NO-FPRCVT: // %bb.0:
243+
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
244+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
245+
; CHECK-NO-FPRCVT-NEXT: fcvt h0, s0
246+
; CHECK-NO-FPRCVT-NEXT: ret
247+
%extract = extractelement <2 x i64> %x, i64 1
248+
%conv = uitofp i64 %extract to half
249+
ret half %conv
250+
}
251+
252+
define float @ucvtf_f32i64(<2 x i64> %x) {
253+
; CHECK-LABEL: ucvtf_f32i64:
254+
; CHECK: // %bb.0:
255+
; CHECK-NEXT: ucvtf s0, d0
256+
; CHECK-NEXT: ret
257+
;
258+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64:
259+
; CHECK-NO-FPRCVT: // %bb.0:
260+
; CHECK-NO-FPRCVT-NEXT: fmov x8, d0
261+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
262+
; CHECK-NO-FPRCVT-NEXT: ret
263+
%extract = extractelement <2 x i64> %x, i64 0
264+
%conv = uitofp i64 %extract to float
265+
ret float %conv
266+
}
267+
268+
define float @ucvtf_f32i64_neg(<2 x i64> %x) {
269+
; CHECK-LABEL: ucvtf_f32i64_neg:
270+
; CHECK: // %bb.0:
271+
; CHECK-NEXT: mov x8, v0.d[1]
272+
; CHECK-NEXT: ucvtf s0, x8
273+
; CHECK-NEXT: ret
274+
;
275+
; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_neg:
276+
; CHECK-NO-FPRCVT: // %bb.0:
277+
; CHECK-NO-FPRCVT-NEXT: mov x8, v0.d[1]
278+
; CHECK-NO-FPRCVT-NEXT: ucvtf s0, x8
279+
; CHECK-NO-FPRCVT-NEXT: ret
280+
%extract = extractelement <2 x i64> %x, i64 1
281+
%conv = uitofp i64 %extract to float
282+
ret float %conv
283+
}

0 commit comments

Comments
 (0)