Skip to content

Commit 92a9a4a

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11)
1 parent 67a7378 commit 92a9a4a

File tree

3 files changed

+351
-3
lines changed

3 files changed

+351
-3
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4254,7 +4254,7 @@ defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
42544254
//===----------------------------------------------------------------------===//
42554255
let Predicates = [HasSVE2p2orSME2p2] in {
42564256
// SVE Floating-point convert precision, zeroing predicate
4257-
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt">;
4257+
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
42584258

42594259
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
42604260
defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
@@ -4268,7 +4268,7 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42684268
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
42694269
def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
42704270
// Placing corresponding
4271-
def BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd<0b1001010, "bfcvt", ZPR32, ZPR16>;
4271+
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<0b1001010, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
42724272

42734273
// Floating-point convert to integer, zeroing predicate
42744274
defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,11 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
576576
(inst $Op1, $Op2, $Op3)>;
577577
}
578578

579+
class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
580+
ValueType vt2, ValueType vt3, Instruction inst>
581+
: Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
582+
(inst $Op1, $Op2)>;
583+
579584
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
580585
ValueType vt2, ValueType vt3, ValueType vt4,
581586
Instruction inst>
@@ -3273,6 +3278,12 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
32733278
def _D : sve_fp_z2op_p_zd<{ 0b0010, opc{1}, 1, opc{0} }, asm, ZPR64, ZPR64>;
32743279
}
32753280

3281+
multiclass sve_fp_z2op_p_zd_bfcvt<bits<7> opc, string asm, SDPatternOperator op> {
3282+
def _StoH : sve_fp_z2op_p_zd<opc, asm, ZPR32, ZPR16>;
3283+
3284+
def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
3285+
}
3286+
32763287
multiclass sve_fp_z2op_p_zd_d<bit U, string asm> {
32773288
def _HtoH : sve_fp_z2op_p_zd<{ 0b011101, U }, asm, ZPR16, ZPR16>;
32783289
def _HtoS : sve_fp_z2op_p_zd<{ 0b011110, U }, asm, ZPR16, ZPR32>;
@@ -3299,13 +3310,20 @@ multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
32993310
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
33003311
}
33013312

3302-
multiclass sve_fp_z2op_p_zd_b_0<string asm> {
3313+
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
33033314
def _StoH : sve_fp_z2op_p_zd<0b1001000, asm, ZPR32, ZPR16>;
33043315
def _HtoS : sve_fp_z2op_p_zd<0b1001001, asm, ZPR16, ZPR32>;
33053316
def _DtoH : sve_fp_z2op_p_zd<0b1101000, asm, ZPR64, ZPR16>;
33063317
def _HtoD : sve_fp_z2op_p_zd<0b1101001, asm, ZPR16, ZPR64>;
33073318
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
33083319
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
3320+
3321+
def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
3322+
def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
3323+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
3324+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
3325+
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
3326+
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
33093327
}
33103328

33113329
//===----------------------------------------------------------------------===//
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s
3+
; RUN: llc -mattr=+sve2p2,+bf16 < %s | FileCheck %s -check-prefix CHECK-2p2
4+
5+
; RUN: llc -mattr=+sme,+bf16 -force-streaming < %s | FileCheck %s
6+
; RUN: llc -mattr=+sme2p2,+bf16 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
7+
8+
target triple = "aarch64-linux"
9+
10+
define <vscale x 8 x half> @test_svcvt_f16_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
11+
; CHECK-LABEL: test_svcvt_f16_f32_x_1:
12+
; CHECK: // %bb.0: // %entry
13+
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
14+
; CHECK-NEXT: ret
15+
;
16+
; CHECK-2p2-LABEL: test_svcvt_f16_f32_x_1:
17+
; CHECK-2p2: // %bb.0: // %entry
18+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.s
19+
; CHECK-2p2-NEXT: ret
20+
entry:
21+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
22+
ret <vscale x 8 x half> %0
23+
}
24+
25+
define <vscale x 8 x half> @test_svcvt_f16_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
26+
; CHECK-LABEL: test_svcvt_f16_f32_x_2:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
29+
; CHECK-NEXT: ret
30+
;
31+
; CHECK-2p2-LABEL: test_svcvt_f16_f32_x_2:
32+
; CHECK-2p2: // %bb.0: // %entry
33+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
34+
; CHECK-2p2-NEXT: ret
35+
entry:
36+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
37+
ret <vscale x 8 x half> %0
38+
}
39+
40+
define <vscale x 8 x half> @test_svcvt_f16_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
41+
; CHECK-LABEL: test_svcvt_f16_f32_z:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: mov z0.h, #0 // =0x0
44+
; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
45+
; CHECK-NEXT: ret
46+
;
47+
; CHECK-2p2-LABEL: test_svcvt_f16_f32_z:
48+
; CHECK-2p2: // %bb.0: // %entry
49+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
50+
; CHECK-2p2-NEXT: ret
51+
entry:
52+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
53+
ret <vscale x 8 x half> %0
54+
}
55+
56+
define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
57+
; CHECK-LABEL: test_svcvt_bf16_f32_x_1:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s
60+
; CHECK-NEXT: ret
61+
;
62+
; CHECK-2p2-LABEL: test_svcvt_bf16_f32_x_1:
63+
; CHECK-2p2: // %bb.0: // %entry
64+
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z0.s
65+
; CHECK-2p2-NEXT: ret
66+
entry:
67+
%0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
68+
ret <vscale x 8 x bfloat> %0
69+
}
70+
71+
define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
72+
; CHECK-LABEL: test_svcvt_bf16_f32_x_2:
73+
; CHECK: // %bb.0: // %entry
74+
; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
75+
; CHECK-NEXT: ret
76+
;
77+
; CHECK-2p2-LABEL: test_svcvt_bf16_f32_x_2:
78+
; CHECK-2p2: // %bb.0: // %entry
79+
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
80+
; CHECK-2p2-NEXT: ret
81+
entry:
82+
%0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
83+
ret <vscale x 8 x bfloat> %0
84+
}
85+
86+
define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
87+
; CHECK-LABEL: test_svcvt_bf16_f32_z:
88+
; CHECK: // %bb.0: // %entry
89+
; CHECK-NEXT: mov z0.h, #0 // =0x0
90+
; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
91+
; CHECK-NEXT: ret
92+
;
93+
; CHECK-2p2-LABEL: test_svcvt_bf16_f32_z:
94+
; CHECK-2p2: // %bb.0: // %entry
95+
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
96+
; CHECK-2p2-NEXT: ret
97+
entry:
98+
%0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
99+
ret <vscale x 8 x bfloat> %0
100+
}
101+
102+
define <vscale x 8 x half> @test_svcvt_f16_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
103+
; CHECK-LABEL: test_svcvt_f16_f64_x_1:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
106+
; CHECK-NEXT: ret
107+
;
108+
; CHECK-2p2-LABEL: test_svcvt_f16_f64_x_1:
109+
; CHECK-2p2: // %bb.0: // %entry
110+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.d
111+
; CHECK-2p2-NEXT: ret
112+
entry:
113+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
114+
ret <vscale x 8 x half> %0
115+
}
116+
117+
define <vscale x 8 x half> @test_svcvt_f16_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
118+
; CHECK-LABEL: test_svcvt_f16_f64_x_2:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: fcvt z0.h, p0/m, z1.d
121+
; CHECK-NEXT: ret
122+
;
123+
; CHECK-2p2-LABEL: test_svcvt_f16_f64_x_2:
124+
; CHECK-2p2: // %bb.0: // %entry
125+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
126+
; CHECK-2p2-NEXT: ret
127+
entry:
128+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
129+
ret <vscale x 8 x half> %0
130+
}
131+
132+
define <vscale x 8 x half> @test_svcvt_f16_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
133+
; CHECK-LABEL: test_svcvt_f16_f64_z:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: mov z0.h, #0 // =0x0
136+
; CHECK-NEXT: fcvt z0.h, p0/m, z1.d
137+
; CHECK-NEXT: ret
138+
;
139+
; CHECK-2p2-LABEL: test_svcvt_f16_f64_z:
140+
; CHECK-2p2: // %bb.0: // %entry
141+
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
142+
; CHECK-2p2-NEXT: ret
143+
entry:
144+
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
145+
ret <vscale x 8 x half> %0
146+
}
147+
148+
define <vscale x 4 x float> @test_svcvt_f32_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
149+
; CHECK-LABEL: test_svcvt_f32_f64_x_1:
150+
; CHECK: // %bb.0: // %entry
151+
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
152+
; CHECK-NEXT: ret
153+
;
154+
; CHECK-2p2-LABEL: test_svcvt_f32_f64_x_1:
155+
; CHECK-2p2: // %bb.0: // %entry
156+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.d
157+
; CHECK-2p2-NEXT: ret
158+
entry:
159+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
160+
ret <vscale x 4 x float> %0
161+
}
162+
163+
define <vscale x 4 x float> @test_svcvt_f32_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
164+
; CHECK-LABEL: test_svcvt_f32_f64_x_2:
165+
; CHECK: // %bb.0: // %entry
166+
; CHECK-NEXT: fcvt z0.s, p0/m, z1.d
167+
; CHECK-NEXT: ret
168+
;
169+
; CHECK-2p2-LABEL: test_svcvt_f32_f64_x_2:
170+
; CHECK-2p2: // %bb.0: // %entry
171+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
172+
; CHECK-2p2-NEXT: ret
173+
entry:
174+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
175+
ret <vscale x 4 x float> %0
176+
}
177+
178+
define <vscale x 4 x float> @test_svcvt_f32_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
179+
; CHECK-LABEL: test_svcvt_f32_f64_z:
180+
; CHECK: // %bb.0: // %entry
181+
; CHECK-NEXT: mov z0.s, #0 // =0x0
182+
; CHECK-NEXT: fcvt z0.s, p0/m, z1.d
183+
; CHECK-NEXT: ret
184+
;
185+
; CHECK-2p2-LABEL: test_svcvt_f32_f64_z:
186+
; CHECK-2p2: // %bb.0: // %entry
187+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
188+
; CHECK-2p2-NEXT: ret
189+
entry:
190+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
191+
ret <vscale x 4 x float> %0
192+
}
193+
194+
define <vscale x 4 x float> @test_svcvt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vscale x 8 x half> %x) {
195+
; CHECK-LABEL: test_svcvt_f32_f16_x_1:
196+
; CHECK: // %bb.0: // %entry
197+
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
198+
; CHECK-NEXT: ret
199+
;
200+
; CHECK-2p2-LABEL: test_svcvt_f32_f16_x_1:
201+
; CHECK-2p2: // %bb.0: // %entry
202+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.h
203+
; CHECK-2p2-NEXT: ret
204+
entry:
205+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
206+
ret <vscale x 4 x float> %0
207+
}
208+
209+
define <vscale x 4 x float> @test_svcvt_f32_f16_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
210+
; CHECK-LABEL: test_svcvt_f32_f16_x_2:
211+
; CHECK: // %bb.0: // %entry
212+
; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
213+
; CHECK-NEXT: ret
214+
;
215+
; CHECK-2p2-LABEL: test_svcvt_f32_f16_x_2:
216+
; CHECK-2p2: // %bb.0: // %entry
217+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
218+
; CHECK-2p2-NEXT: ret
219+
entry:
220+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
221+
ret <vscale x 4 x float> %0
222+
}
223+
224+
define <vscale x 4 x float> @test_svcvt_f32_f16_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
225+
; CHECK-LABEL: test_svcvt_f32_f16_z:
226+
; CHECK: // %bb.0: // %entry
227+
; CHECK-NEXT: mov z0.s, #0 // =0x0
228+
; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
229+
; CHECK-NEXT: ret
230+
;
231+
; CHECK-2p2-LABEL: test_svcvt_f32_f16_z:
232+
; CHECK-2p2: // %bb.0: // %entry
233+
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
234+
; CHECK-2p2-NEXT: ret
235+
entry:
236+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
237+
ret <vscale x 4 x float> %0
238+
}
239+
240+
define <vscale x 2 x double> @test_svcvt_f64_f16_x_1(<vscale x 2 x i1> %pg, <vscale x 8 x half> %x) {
241+
; CHECK-LABEL: test_svcvt_f64_f16_x_1:
242+
; CHECK: // %bb.0: // %entry
243+
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
244+
; CHECK-NEXT: ret
245+
;
246+
; CHECK-2p2-LABEL: test_svcvt_f64_f16_x_1:
247+
; CHECK-2p2: // %bb.0: // %entry
248+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.h
249+
; CHECK-2p2-NEXT: ret
250+
entry:
251+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
252+
ret <vscale x 2 x double> %0
253+
}
254+
255+
define <vscale x 2 x double> @test_svcvt_f64_f16_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
256+
; CHECK-LABEL: test_svcvt_f64_f16_x_2:
257+
; CHECK: // %bb.0: // %entry
258+
; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
259+
; CHECK-NEXT: ret
260+
;
261+
; CHECK-2p2-LABEL: test_svcvt_f64_f16_x_2:
262+
; CHECK-2p2: // %bb.0: // %entry
263+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
264+
; CHECK-2p2-NEXT: ret
265+
entry:
266+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
267+
ret <vscale x 2 x double> %0
268+
}
269+
270+
define <vscale x 2 x double> @test_svcvt_f64_f16_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
271+
; CHECK-LABEL: test_svcvt_f64_f16_z:
272+
; CHECK: // %bb.0: // %entry
273+
; CHECK-NEXT: mov z0.d, #0 // =0x0
274+
; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
275+
; CHECK-NEXT: ret
276+
;
277+
; CHECK-2p2-LABEL: test_svcvt_f64_f16_z:
278+
; CHECK-2p2: // %bb.0: // %entry
279+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
280+
; CHECK-2p2-NEXT: ret
281+
entry:
282+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
283+
ret <vscale x 2 x double> %0
284+
}
285+
286+
define <vscale x 2 x double> @test_svcvt_f64_f32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x float> %x) {
287+
; CHECK-LABEL: test_svcvt_f64_f32_x_1:
288+
; CHECK: // %bb.0: // %entry
289+
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
290+
; CHECK-NEXT: ret
291+
;
292+
; CHECK-2p2-LABEL: test_svcvt_f64_f32_x_1:
293+
; CHECK-2p2: // %bb.0: // %entry
294+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.s
295+
; CHECK-2p2-NEXT: ret
296+
entry:
297+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
298+
ret <vscale x 2 x double> %0
299+
}
300+
301+
define <vscale x 2 x double> @test_svcvt_f64_f32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
302+
; CHECK-LABEL: test_svcvt_f64_f32_x_2:
303+
; CHECK: // %bb.0: // %entry
304+
; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
305+
; CHECK-NEXT: ret
306+
;
307+
; CHECK-2p2-LABEL: test_svcvt_f64_f32_x_2:
308+
; CHECK-2p2: // %bb.0: // %entry
309+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
310+
; CHECK-2p2-NEXT: ret
311+
entry:
312+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
313+
ret <vscale x 2 x double> %0
314+
}
315+
316+
define <vscale x 2 x double> @test_svcvt_f64_f32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
317+
; CHECK-LABEL: test_svcvt_f64_f32_z:
318+
; CHECK: // %bb.0: // %entry
319+
; CHECK-NEXT: mov z0.d, #0 // =0x0
320+
; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
321+
; CHECK-NEXT: ret
322+
;
323+
; CHECK-2p2-LABEL: test_svcvt_f64_f32_z:
324+
; CHECK-2p2: // %bb.0: // %entry
325+
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
326+
; CHECK-2p2-NEXT: ret
327+
entry:
328+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
329+
ret <vscale x 2 x double> %0
330+
}

0 commit comments

Comments
 (0)