Skip to content

Commit e3d0f18

Browse files
committed
[Clang][LLVM] Implement multi-single vectors MOP4{A/S}
Implement all multi-single {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on llvm#128854
1 parent 801f1d5 commit e3d0f18

File tree

6 files changed

+858
-11
lines changed

6 files changed

+858
-11
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ defm SVMOPS : ZAFPOuterProd<"mops">;
295295
multiclass MOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
296296
def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>;
297297
def _1x2 : Inst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{d}]", "vid2", t, MergeNone, i # "_1x2", [IsInOutZA, IsStreaming], checks>;
298+
def _2x1 : Inst<"svmop4" # mode # "[_2x1]" # za # "[_{d}_{d}]", "vi2d", t, MergeNone, i # "_2x1", [IsInOutZA, IsStreaming], checks>;
298299
}
299300

300301
let SMETargetGuard = "sme2,sme-mop4" in {
@@ -350,6 +351,10 @@ multiclass SUMOP4<string mode, string za, string t, string i, list<ImmCheck> che
350351
"vid2.u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x2",
351352
[IsStreaming, IsInOutZA],
352353
checks>;
354+
def _2x1 : SInst<"svmop4" # mode # "[_2x1]_" # za # "[_{d}_{3}]",
355+
"vi2u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_2x1",
356+
[IsStreaming, IsInOutZA],
357+
checks>;
353358
}
354359

355360
multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
@@ -361,6 +366,10 @@ multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> che
361366
"vid2.x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x2",
362367
[IsStreaming, IsInOutZA],
363368
checks>;
369+
def _2x1 : SInst<"svmop4" # mode # "[_2x1]_" # za # "[_{d}_{3}]",
370+
"vi2x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_2x1",
371+
[IsStreaming, IsInOutZA],
372+
checks>;
364373
}
365374

366375
let SMETargetGuard = "sme2,sme-mop4" in {
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
7+
8+
9+
#include <arm_sme.h>
10+
11+
#ifdef SME_OVERLOADED_FORMS
12+
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3, A4_UNUSED) A1##A3
13+
#else
14+
#define SME_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
15+
#endif
16+
17+
// CHECK-LABEL: @test_svmop4a_2x1_za32_s8_s8(
18+
// CHECK-NEXT: entry:
19+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
20+
// CHECK-NEXT: ret void
21+
//
22+
void test_svmop4a_2x1_za32_s8_s8(svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
23+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s8_s8)(1, zn, zm);
24+
}
25+
26+
// CHECK-LABEL: @test_svmop4s_2x1_za32_s8_s8(
27+
// CHECK-NEXT: entry:
28+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
29+
// CHECK-NEXT: ret void
30+
//
31+
void test_svmop4s_2x1_za32_s8_s8(svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
32+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s8_s8)(1, zn, zm);
33+
}
34+
35+
// CHECK-LABEL: @test_svmop4a_2x1_za32_u8_u8(
36+
// CHECK-NEXT: entry:
37+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
38+
// CHECK-NEXT: ret void
39+
//
40+
void test_svmop4a_2x1_za32_u8_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
41+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u8_u8)(1, zn, zm);
42+
}
43+
44+
// CHECK-LABEL: @test_svmop4s_2x1_za32_u8_u8(
45+
// CHECK-NEXT: entry:
46+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
47+
// CHECK-NEXT: ret void
48+
//
49+
void test_svmop4s_2x1_za32_u8_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
50+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_u8_u8)(1, zn, zm);
51+
}
52+
53+
// CHECK-LABEL: @test_svmop4a_2x1_za32_s8_u8(
54+
// CHECK-NEXT: entry:
55+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
56+
// CHECK-NEXT: ret void
57+
//
58+
void test_svmop4a_2x1_za32_s8_u8(svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
59+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s8_u8)(1, zn, zm);
60+
}
61+
62+
// CHECK-LABEL: @test_svmop4s_2x1_za32_s8_u8(
63+
// CHECK-NEXT: entry:
64+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
65+
// CHECK-NEXT: ret void
66+
//
67+
void test_svmop4s_2x1_za32_s8_u8(svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
68+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s8_u8)(1, zn, zm);
69+
}
70+
71+
// CHECK-LABEL: @test_svmop4a_2x1_za32_u8_s8(
72+
// CHECK-NEXT: entry:
73+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
74+
// CHECK-NEXT: ret void
75+
//
76+
void test_svmop4a_2x1_za32_u8_s8(svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
77+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u8_s8)(1, zn, zm);
78+
}
79+
80+
// CHECK-LABEL: @test_svmop4s_2x1_za32_u8_s8(
81+
// CHECK-NEXT: entry:
82+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
83+
// CHECK-NEXT: ret void
84+
//
85+
void test_svmop4s_2x1_za32_u8_s8(svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
86+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_u8_s8)(1, zn, zm);
87+
}
88+
89+
// CHECK-LABEL: @test_svmop4a_2x1_za32_s16_s16(
90+
// CHECK-NEXT: entry:
91+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
92+
// CHECK-NEXT: ret void
93+
//
94+
void test_svmop4a_2x1_za32_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
95+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_s16_s16)(1, zn, zm);
96+
}
97+
98+
// CHECK-LABEL: @test_svmop4s_2x1_za32_s16_s16(
99+
// CHECK-NEXT: entry:
100+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
101+
// CHECK-NEXT: ret void
102+
//
103+
void test_svmop4s_2x1_za32_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
104+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_s16_s16)(1, zn, zm);
105+
}
106+
107+
// CHECK-LABEL: @test_svmop4a_2x1_za32_u16_u16(
108+
// CHECK-NEXT: entry:
109+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
110+
// CHECK-NEXT: ret void
111+
//
112+
void test_svmop4a_2x1_za32_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
113+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u16_u16)(1, zn, zm);
114+
}
115+
116+
// CHECK-LABEL: @test_svmop4s_2x1_za32_u16_u16(
117+
// CHECK-NEXT: entry:
118+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
119+
// CHECK-NEXT: ret void
120+
//
121+
void test_svmop4s_2x1_za32_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
122+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_u16_u16)(1, zn, zm);
123+
}
124+
125+
// CHECK-LABEL: @test_svmop4a_2x1_za32_f16_f16(
126+
// CHECK-NEXT: entry:
127+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
128+
// CHECK-NEXT: ret void
129+
//
130+
void test_svmop4a_2x1_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
131+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_f16_f16)(1, zn, zm);
132+
}
133+
134+
// CHECK-LABEL: @test_svmop4s_2x1_za32_f16_f16(
135+
// CHECK-NEXT: entry:
136+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
137+
// CHECK-NEXT: ret void
138+
//
139+
void test_svmop4s_2x1_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
140+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_f16_f16)(1, zn, zm);
141+
}
142+
143+
// CHECK-LABEL: @test_svmop4a_2x1_za32_bf16_bf16(
144+
// CHECK-NEXT: entry:
145+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
146+
// CHECK-NEXT: ret void
147+
//
148+
void test_svmop4a_2x1_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
149+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_bf16_bf16)(1, zn, zm);
150+
}
151+
152+
// CHECK-LABEL: @test_svmop4s_2x1_za32_bf16_bf16(
153+
// CHECK-NEXT: entry:
154+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
155+
// CHECK-NEXT: ret void
156+
//
157+
void test_svmop4s_2x1_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
158+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_bf16_bf16)(1, zn, zm);
159+
}
160+
161+
// CHECK-LABEL: @test_svmop4a_2x1_za64_s16_s16(
162+
// CHECK-NEXT: entry:
163+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
164+
// CHECK-NEXT: ret void
165+
//
166+
void test_svmop4a_2x1_za64_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
167+
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_s16_s16)(1, zn, zm);
168+
}
169+
170+
// CHECK-LABEL: @test_svmop4s_2x1_za64_s16_s16(
171+
// CHECK-NEXT: entry:
172+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
173+
// CHECK-NEXT: ret void
174+
//
175+
void test_svmop4s_2x1_za64_s16_s16(svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
176+
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_s16_s16)(1, zn, zm);
177+
}
178+
179+
// CHECK-LABEL: @test_svmop4a_2x1_za64_u16_u16(
180+
// CHECK-NEXT: entry:
181+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
182+
// CHECK-NEXT: ret void
183+
//
184+
void test_svmop4a_2x1_za64_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
185+
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_u16_u16)(1, zn, zm);
186+
}
187+
188+
// CHECK-LABEL: @test_svmop4s_2x1_za64_u16_u16(
189+
// CHECK-NEXT: entry:
190+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
191+
// CHECK-NEXT: ret void
192+
//
193+
void test_svmop4s_2x1_za64_u16_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
194+
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_u16_u16)(1, zn, zm);
195+
}
196+
197+
// CHECK-LABEL: @test_svmop4a_2x1_za64_s16_u16(
198+
// CHECK-NEXT: entry:
199+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
200+
// CHECK-NEXT: ret void
201+
//
202+
void test_svmop4a_2x1_za64_s16_u16(svint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
203+
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_s16_u16)(1, zn, zm);
204+
}
205+
206+
// CHECK-LABEL: @test_svmop4s_2x1_za64_s16_u16(
207+
// CHECK-NEXT: entry:
208+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
209+
// CHECK-NEXT: ret void
210+
//
211+
void test_svmop4s_2x1_za64_s16_u16(svint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
212+
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_s16_u16)(1, zn, zm);
213+
}
214+
215+
// CHECK-LABEL: @test_svmop4a_2x1_za64_u16_s16(
216+
// CHECK-NEXT: entry:
217+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
218+
// CHECK-NEXT: ret void
219+
//
220+
void test_svmop4a_2x1_za64_u16_s16(svuint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
221+
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_u16_s16)(1, zn, zm);
222+
}
223+
224+
// CHECK-LABEL: @test_svmop4s_2x1_za64_u16_s16(
225+
// CHECK-NEXT: entry:
226+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.2x1.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
227+
// CHECK-NEXT: ret void
228+
//
229+
void test_svmop4s_2x1_za64_u16_s16(svuint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
230+
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_u16_s16)(1, zn, zm);
231+
}
232+
233+
234+
// CHECK-LABEL: @test_svmop4a_2x1_za16_f16_f16(
235+
// CHECK-NEXT: entry:
236+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
237+
// CHECK-NEXT: ret void
238+
//
239+
void test_svmop4a_2x1_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
240+
SME_ACLE_FUNC(svmop4a,_2x1,_za16,_f16_f16)(1, zn, zm);
241+
}
242+
243+
// CHECK-LABEL: @test_svmop4s_2x1_za16_f16_f16(
244+
// CHECK-NEXT: entry:
245+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]])
246+
// CHECK-NEXT: ret void
247+
//
248+
void test_svmop4s_2x1_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
249+
SME_ACLE_FUNC(svmop4s,_2x1,_za16,_f16_f16)(1, zn, zm);
250+
}
251+
252+
// CHECK-LABEL: @test_svmop4a_2x1_za32_f32_f32(
253+
// CHECK-NEXT: entry:
254+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
255+
// CHECK-NEXT: ret void
256+
//
257+
void test_svmop4a_2x1_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
258+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_f32_f32)(1, zn, zm);
259+
}
260+
261+
// CHECK-LABEL: @test_svmop4s_2x1_za32_f32_f32(
262+
// CHECK-NEXT: entry:
263+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
264+
// CHECK-NEXT: ret void
265+
//
266+
void test_svmop4s_2x1_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
267+
SME_ACLE_FUNC(svmop4s,_2x1,_za32,_f32_f32)(1, zn, zm);
268+
}
269+
270+
// CHECK-LABEL: @test_svmop4a_2x1_za64_f64_f64(
271+
// CHECK-NEXT: entry:
272+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv2f64(i32 1, <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
273+
// CHECK-NEXT: ret void
274+
//
275+
void test_svmop4a_2x1_za64_f64_f64(svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
276+
SME_ACLE_FUNC(svmop4a,_2x1,_za64,_f64_f64)(1, zn, zm);
277+
}
278+
279+
// CHECK-LABEL: @test_svmop4s_2x1_za64_f64_f64(
280+
// CHECK-NEXT: entry:
281+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv2f64(i32 1, <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
282+
// CHECK-NEXT: ret void
283+
//
284+
void test_svmop4s_2x1_za64_f64_f64(svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
285+
SME_ACLE_FUNC(svmop4s,_2x1,_za64,_f64_f64)(1, zn, zm);
286+
}
287+
288+
// CHECK-LABEL: @test_svmop4a_2x1_za16_bf16_bf16(
289+
// CHECK-NEXT: entry:
290+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
291+
// CHECK-NEXT: ret void
292+
//
293+
void test_svmop4a_2x1_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
294+
SME_ACLE_FUNC(svmop4a,_2x1,_za16,_bf16_bf16)(1, zn, zm);
295+
}
296+
297+
// CHECK-LABEL: @test_svmop4s_2x1_za16_bf16_bf16(
298+
// CHECK-NEXT: entry:
299+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.2x1.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
300+
// CHECK-NEXT: ret void
301+
//
302+
void test_svmop4s_2x1_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
303+
SME_ACLE_FUNC(svmop4s,_2x1,_za16,_bf16_bf16)(1, zn, zm);
304+
}

0 commit comments

Comments
 (0)