Skip to content

Commit ca7e545

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11)
1 parent 92a9a4a commit ca7e545

File tree

3 files changed

+159
-3
lines changed

3 files changed

+159
-3
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4260,10 +4260,10 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42604260
defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
42614261
def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
42624262
// Placing even
4263-
def FCVTX_ZPzZ_DtoS : sve_fp_z2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32>;
4263+
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
42644264

42654265
// SVE2p2 floating-point convert precision up, zeroing predicate
4266-
defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt">;
4266+
defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
42674267

42684268
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
42694269
def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2858,9 +2858,12 @@ multiclass sve_fp_fcvtntz<string asm> {
28582858
def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
28592859
}
28602860

2861-
multiclass sve_fp_fcvtltz<string asm> {
2861+
multiclass sve_fp_fcvtltz<string asm, string op> {
28622862
def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
28632863
def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
2864+
2865+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
2866+
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
28642867
}
28652868

28662869
//===----------------------------------------------------------------------===//
@@ -3267,6 +3270,12 @@ class sve_fp_z2op_p_zd<bits<7> opc,string asm, RegisterOperand i_zprtype,
32673270
let mayRaiseFPException = 1;
32683271
}
32693272

3273+
multiclass sve_fp_z2op_p_zd<string asm, SDPatternOperator op> {
3274+
def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>;
3275+
3276+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
3277+
}
3278+
32703279
multiclass sve_fp_z2op_p_zd_hsd<bits<5> opc, string asm> {
32713280
def _H : sve_fp_z2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>;
32723281
def _S : sve_fp_z2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>;
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve2 < %s | FileCheck %s
3+
; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
4+
5+
; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
6+
; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
7+
8+
target triple = "aarch64-linux"
9+
10+
define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vscale x 8 x half> %x) {
11+
; CHECK-LABEL: test_svcvtlt_f32_f16_x_1:
12+
; CHECK: // %bb.0: // %entry
13+
; CHECK-NEXT: fcvtlt z0.s, p0/m, z0.h
14+
; CHECK-NEXT: ret
15+
;
16+
; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_x_1:
17+
; CHECK-2p2: // %bb.0: // %entry
18+
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z0.h
19+
; CHECK-2p2-NEXT: ret
20+
entry:
21+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
22+
ret <vscale x 4 x float> %0
23+
}
24+
25+
define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
26+
; CHECK-LABEL: test_svcvtlt_f32_f16_x_2:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: fcvtlt z0.s, p0/m, z1.h
29+
; CHECK-NEXT: ret
30+
;
31+
; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_x_2:
32+
; CHECK-2p2: // %bb.0: // %entry
33+
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
34+
; CHECK-2p2-NEXT: ret
35+
entry:
36+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
37+
ret <vscale x 4 x float> %0
38+
}
39+
40+
define <vscale x 4 x float> @test_svcvtlt_f32_f16_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
41+
; CHECK-LABEL: test_svcvtlt_f32_f16_z:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: mov z0.s, #0 // =0x0
44+
; CHECK-NEXT: fcvtlt z0.s, p0/m, z1.h
45+
; CHECK-NEXT: ret
46+
;
47+
; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_z:
48+
; CHECK-2p2: // %bb.0: // %entry
49+
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
50+
; CHECK-2p2-NEXT: ret
51+
entry:
52+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
53+
ret <vscale x 4 x float> %0
54+
}
55+
56+
define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x float> %x) {
57+
; CHECK-LABEL: test_svcvtlt_f64_f32_x_1:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: fcvtlt z0.d, p0/m, z0.s
60+
; CHECK-NEXT: ret
61+
;
62+
; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_x_1:
63+
; CHECK-2p2: // %bb.0: // %entry
64+
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z0.s
65+
; CHECK-2p2-NEXT: ret
66+
entry:
67+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
68+
ret <vscale x 2 x double> %0
69+
}
70+
71+
define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
72+
; CHECK-LABEL: test_svcvtlt_f64_f32_x_2:
73+
; CHECK: // %bb.0: // %entry
74+
; CHECK-NEXT: fcvtlt z0.d, p0/m, z1.s
75+
; CHECK-NEXT: ret
76+
;
77+
; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_x_2:
78+
; CHECK-2p2: // %bb.0: // %entry
79+
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
80+
; CHECK-2p2-NEXT: ret
81+
entry:
82+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
83+
ret <vscale x 2 x double> %0
84+
}
85+
86+
define <vscale x 2 x double> @test_svcvtlt_f64_f32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
87+
; CHECK-LABEL: test_svcvtlt_f64_f32_z:
88+
; CHECK: // %bb.0: // %entry
89+
; CHECK-NEXT: mov z0.d, #0 // =0x0
90+
; CHECK-NEXT: fcvtlt z0.d, p0/m, z1.s
91+
; CHECK-NEXT: ret
92+
;
93+
; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_z:
94+
; CHECK-2p2: // %bb.0: // %entry
95+
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
96+
; CHECK-2p2-NEXT: ret
97+
entry:
98+
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
99+
ret <vscale x 2 x double> %0
100+
}
101+
102+
define <vscale x 4 x float> @test_svcvtx_f32_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
103+
; CHECK-LABEL: test_svcvtx_f32_f64_x_1:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK-NEXT: fcvtx z0.s, p0/m, z0.d
106+
; CHECK-NEXT: ret
107+
;
108+
; CHECK-2p2-LABEL: test_svcvtx_f32_f64_x_1:
109+
; CHECK-2p2: // %bb.0: // %entry
110+
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z0.d
111+
; CHECK-2p2-NEXT: ret
112+
entry:
113+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
114+
ret <vscale x 4 x float> %0
115+
}
116+
117+
define <vscale x 4 x float> @test_svcvtx_f32_f64_x_2(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
118+
; CHECK-LABEL: test_svcvtx_f32_f64_x_2:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: fcvtx z0.s, p0/m, z0.d
121+
; CHECK-NEXT: ret
122+
;
123+
; CHECK-2p2-LABEL: test_svcvtx_f32_f64_x_2:
124+
; CHECK-2p2: // %bb.0: // %entry
125+
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z0.d
126+
; CHECK-2p2-NEXT: ret
127+
entry:
128+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
129+
ret <vscale x 4 x float> %0
130+
}
131+
132+
define <vscale x 4 x float> @test_svcvtx_f32_f64_z(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
133+
; CHECK-LABEL: test_svcvtx_f32_f64_z:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: mov z1.s, #0 // =0x0
136+
; CHECK-NEXT: fcvtx z1.s, p0/m, z0.d
137+
; CHECK-NEXT: mov z0.d, z1.d
138+
; CHECK-NEXT: ret
139+
;
140+
; CHECK-2p2-LABEL: test_svcvtx_f32_f64_z:
141+
; CHECK-2p2: // %bb.0: // %entry
142+
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z0.d
143+
; CHECK-2p2-NEXT: ret
144+
entry:
145+
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
146+
ret <vscale x 4 x float> %0
147+
}

0 commit comments

Comments
 (0)