Skip to content

Commit d29ca21

Browse files
Add instrinsics to differentiate za64 and za32
1 parent 88e9c0e commit d29ca21

File tree

5 files changed

+146
-98
lines changed

5 files changed

+146
-98
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -380,16 +380,16 @@ multiclass MOP4<string name, string n, string t, string i, string wide> {
380380
def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
381381
}
382382

383-
multiclass SUMOP4<string s, string za, string t> {
383+
multiclass SUMOP4<string s, string za, string t, string i> {
384384
def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
385-
"vidu", t, MergeNone, "aarch64_sme_sumop4" # s # "_wide_1x1",
385+
"vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1",
386386
[IsStreaming, IsInOutZA],
387387
[ImmCheck<0, ImmCheck0_3>]>;
388388
}
389389

390-
multiclass USMOP4<string s, string za, string t> {
390+
multiclass USMOP4<string s, string za, string t, string i> {
391391
def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
392-
"vidx", t, MergeNone, "aarch64_sme_usmop4" # s # "_wide_1x1",
392+
"vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1",
393393
[IsStreaming, IsInOutZA],
394394
[ImmCheck<0, ImmCheck0_3>]>;
395395
}
@@ -419,10 +419,10 @@ let SMETargetGuard = "sme2" in {
419419
defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">;
420420
defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">;
421421

422-
defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs">;
423-
defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs">;
424-
defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs">;
425-
defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs">;
422+
defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "">;
423+
defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "">;
424+
defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "">;
425+
defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "">;
426426

427427
// VERTICAL DOT-PRODUCT
428428
def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>;
@@ -474,14 +474,14 @@ let SMETargetGuard = "sme2" in {
474474
}
475475

476476
let SMETargetGuard = "sme2,sme-i16i64" in {
477-
defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a", "_wide">;
478-
defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s", "_wide">;
479-
defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">;
480-
defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">;
481-
defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s">;
482-
defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s">;
483-
defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us">;
484-
defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us">;
477+
defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide">;
478+
defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide">;
479+
defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide">;
480+
defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide">;
481+
defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64">;
482+
defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64">;
483+
defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64">;
484+
defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64">;
485485

486486
def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;
487487
def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;

clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,62 @@ void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __a
7272
SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm);
7373
}
7474

75+
// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8(
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
78+
// CHECK-NEXT: ret void
79+
//
80+
// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
81+
// CPP-CHECK-NEXT: entry:
82+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
83+
// CPP-CHECK-NEXT: ret void
84+
//
85+
void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
86+
SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm);
87+
}
88+
89+
// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8(
90+
// CHECK-NEXT: entry:
91+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
92+
// CHECK-NEXT: ret void
93+
//
94+
// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
95+
// CPP-CHECK-NEXT: entry:
96+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
97+
// CPP-CHECK-NEXT: ret void
98+
//
99+
void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
100+
SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm);
101+
}
102+
103+
// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8(
104+
// CHECK-NEXT: entry:
105+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
106+
// CHECK-NEXT: ret void
107+
//
108+
// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
109+
// CPP-CHECK-NEXT: entry:
110+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
111+
// CPP-CHECK-NEXT: ret void
112+
//
113+
void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
114+
SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm);
115+
}
116+
117+
// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8(
118+
// CHECK-NEXT: entry:
119+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
120+
// CHECK-NEXT: ret void
121+
//
122+
// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
123+
// CPP-CHECK-NEXT: entry:
124+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
125+
// CPP-CHECK-NEXT: ret void
126+
//
127+
void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
128+
SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm);
129+
}
130+
75131
// CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16(
76132
// CHECK-NEXT: entry:
77133
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
@@ -186,40 +242,40 @@ void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_str
186242

187243
// CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16(
188244
// CHECK-NEXT: entry:
189-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
245+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
190246
// CHECK-NEXT: ret void
191247
//
192248
// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_(
193249
// CPP-CHECK-NEXT: entry:
194-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
250+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
195251
// CPP-CHECK-NEXT: ret void
196252
//
197253
void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
198-
SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm);
254+
SME_ACLE_FUNC(svmop4a_1x1_za64,_s16_s16,)(3, zn, zm);
199255
}
200256

201257
// CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16(
202258
// CHECK-NEXT: entry:
203-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
259+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
204260
// CHECK-NEXT: ret void
205261
//
206262
// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_(
207263
// CPP-CHECK-NEXT: entry:
208-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
264+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
209265
// CPP-CHECK-NEXT: ret void
210266
//
211267
void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
212-
SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm);
268+
SME_ACLE_FUNC(svmop4s_1x1_za64,_s16_s16,)(3, zn, zm);
213269
}
214270

215271
// CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16(
216272
// CHECK-NEXT: entry:
217-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
273+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
218274
// CHECK-NEXT: ret void
219275
//
220276
// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_(
221277
// CPP-CHECK-NEXT: entry:
222-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
278+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
223279
// CPP-CHECK-NEXT: ret void
224280
//
225281
void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
@@ -228,12 +284,12 @@ void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming
228284

229285
// CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16(
230286
// CHECK-NEXT: entry:
231-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
287+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
232288
// CHECK-NEXT: ret void
233289
//
234290
// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_(
235291
// CPP-CHECK-NEXT: entry:
236-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
292+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
237293
// CPP-CHECK-NEXT: ret void
238294
//
239295
void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
@@ -242,12 +298,12 @@ void test_svmop4s_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming
242298

243299
// CHECK-LABEL: @test_svmop4a_1x1_za64_s16_u16(
244300
// CHECK-NEXT: entry:
245-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
301+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
246302
// CHECK-NEXT: ret void
247303
//
248304
// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t(
249305
// CPP-CHECK-NEXT: entry:
250-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
306+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
251307
// CPP-CHECK-NEXT: ret void
252308
//
253309
void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
@@ -256,12 +312,12 @@ void test_svmop4a_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming
256312

257313
// CHECK-LABEL: @test_svmop4s_1x1_za64_s16_u16(
258314
// CHECK-NEXT: entry:
259-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
315+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
260316
// CHECK-NEXT: ret void
261317
//
262318
// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_u16u11__SVInt16_tu12__SVUint16_t(
263319
// CPP-CHECK-NEXT: entry:
264-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
320+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
265321
// CPP-CHECK-NEXT: ret void
266322
//
267323
void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
@@ -270,12 +326,12 @@ void test_svmop4s_1x1_za64_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming
270326

271327
// CHECK-LABEL: @test_svmop4a_1x1_za64_u16_s16(
272328
// CHECK-NEXT: entry:
273-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
329+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
274330
// CHECK-NEXT: ret void
275331
//
276332
// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t(
277333
// CPP-CHECK-NEXT: entry:
278-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
334+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
279335
// CPP-CHECK-NEXT: ret void
280336
//
281337
void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
@@ -284,74 +340,18 @@ void test_svmop4a_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming
284340

285341
// CHECK-LABEL: @test_svmop4s_1x1_za64_u16_s16(
286342
// CHECK-NEXT: entry:
287-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
343+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
288344
// CHECK-NEXT: ret void
289345
//
290346
// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_s16u12__SVUint16_tu11__SVInt16_t(
291347
// CPP-CHECK-NEXT: entry:
292-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
348+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.za64.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
293349
// CPP-CHECK-NEXT: ret void
294350
//
295351
void test_svmop4s_1x1_za64_u16_s16(svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
296352
SME_ACLE_FUNC(svmop4s_1x1_za64,_u16_s16,)(3, zn, zm);
297353
}
298354

299-
// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_u8(
300-
// CHECK-NEXT: entry:
301-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
302-
// CHECK-NEXT: ret void
303-
//
304-
// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
305-
// CPP-CHECK-NEXT: entry:
306-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
307-
// CPP-CHECK-NEXT: ret void
308-
//
309-
void test_svmop4a_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
310-
SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_u8,)(3, zn, zm);
311-
}
312-
313-
// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_u8(
314-
// CHECK-NEXT: entry:
315-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
316-
// CHECK-NEXT: ret void
317-
//
318-
// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_u8u10__SVInt8_tu11__SVUint8_t(
319-
// CPP-CHECK-NEXT: entry:
320-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
321-
// CPP-CHECK-NEXT: ret void
322-
//
323-
void test_svmop4s_1x1_za32_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
324-
SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_u8,)(3, zn, zm);
325-
}
326-
327-
// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_s8(
328-
// CHECK-NEXT: entry:
329-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
330-
// CHECK-NEXT: ret void
331-
//
332-
// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
333-
// CPP-CHECK-NEXT: entry:
334-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
335-
// CPP-CHECK-NEXT: ret void
336-
//
337-
void test_svmop4a_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
338-
SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_s8,)(3, zn, zm);
339-
}
340-
341-
// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_s8(
342-
// CHECK-NEXT: entry:
343-
// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
344-
// CHECK-NEXT: ret void
345-
//
346-
// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_s8u11__SVUint8_tu10__SVInt8_t(
347-
// CPP-CHECK-NEXT: entry:
348-
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
349-
// CPP-CHECK-NEXT: ret void
350-
//
351-
void test_svmop4s_1x1_za32_u8_s8(svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
352-
SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_s8,)(3, zn, zm);
353-
}
354-
355355
// CHECK-LABEL: @test_svmop4a_1x1_za16_f16_f16(
356356
// CHECK-NEXT: entry:
357357
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3090,6 +3090,14 @@ let TargetPrefix = "aarch64" in {
30903090
def int_aarch64_sme_usmop4s_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
30913091
def int_aarch64_sme_usmop4a_1x1 : SME_OuterProduct_QuaterTile_Single;
30923092
def int_aarch64_sme_usmop4s_1x1 : SME_OuterProduct_QuaterTile_Single;
3093+
def int_aarch64_sme_smop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3094+
def int_aarch64_sme_smop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3095+
def int_aarch64_sme_umop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3096+
def int_aarch64_sme_umop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3097+
def int_aarch64_sme_sumop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3098+
def int_aarch64_sme_sumop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3099+
def int_aarch64_sme_usmop4a_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
3100+
def int_aarch64_sme_usmop4s_za64_wide_1x1 : SME_OuterProduct_QuaterTile_Single;
30933101

30943102
class SME_AddVectorToTile_Intrinsic
30953103
: DefaultAttrsIntrinsic<[],

0 commit comments

Comments
 (0)