Skip to content

Commit fe9f409

Browse files
[Clang][llvm] Implement fp8 FMOP4A intrinsics
1 parent 6e8e25f commit fe9f409

File tree

7 files changed

+402
-15
lines changed

7 files changed

+402
-15
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,24 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in {
318318
defm SVBMOP4S_H : MOP4<"s", "za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>;
319319
}
320320

321+
////////////////////////////////////////////////////////////////////////////////
322+
// SME2 - FP8 FMOP4A, FMOP4S
323+
324+
multiclass MOP4_FP8<string za, string t, list<ImmCheck> checks> {
325+
def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>;
326+
def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>;
327+
def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>;
328+
def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>;
329+
}
330+
331+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in {
332+
defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", "m", [ImmCheck<0, ImmCheck0_3>]>;
333+
}
334+
335+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in {
336+
defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", "m", [ImmCheck<0, ImmCheck0_1>]>;
337+
}
338+
321339
////////////////////////////////////////////////////////////////////////////////
322340
// SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S
323341

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
9+
10+
#include <arm_sme.h>
11+
12+
#ifdef SME_OVERLOADED_FORMS
13+
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
14+
#else
15+
#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
16+
#endif
17+
18+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za16_mf8_mf8_fpm(
19+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
20+
// CHECK-NEXT: [[ENTRY:.*:]]
21+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
23+
// CHECK-NEXT: ret void
24+
//
25+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za16_mf8_mf8_fpmu13__SVMfloat8_tS_m(
26+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
27+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
28+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
29+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
30+
// CPP-CHECK-NEXT: ret void
31+
//
32+
void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
33+
SME_ACLE_FUNC(svmop4a_1x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
34+
}
35+
36+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za16_mf8_mf8_fpm(
37+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
38+
// CHECK-NEXT: [[ENTRY:.*:]]
39+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
40+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
41+
// CHECK-NEXT: ret void
42+
//
43+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za16_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm(
44+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
45+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
46+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
47+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
48+
// CPP-CHECK-NEXT: ret void
49+
//
50+
void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
51+
SME_ACLE_FUNC(svmop4a_1x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
52+
}
53+
54+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za16_mf8_mf8_fpm(
55+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
56+
// CHECK-NEXT: [[ENTRY:.*:]]
57+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
58+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
59+
// CHECK-NEXT: ret void
60+
//
61+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm(
62+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
63+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
64+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
65+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
66+
// CPP-CHECK-NEXT: ret void
67+
//
68+
void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
69+
SME_ACLE_FUNC(svmop4a_2x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
70+
}
71+
72+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za16_mf8_mf8_fpm(
73+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
74+
// CHECK-NEXT: [[ENTRY:.*:]]
75+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
76+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
77+
// CHECK-NEXT: ret void
78+
//
79+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za16_mf8_mf8_fpm13svmfloat8x2_tS_m(
80+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
81+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
82+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
83+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
84+
// CPP-CHECK-NEXT: ret void
85+
//
86+
void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
87+
SME_ACLE_FUNC(svmop4a_2x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
88+
}
89+
90+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za32_mf8_mf8_fpm(
91+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
92+
// CHECK-NEXT: [[ENTRY:.*:]]
93+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
94+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
95+
// CHECK-NEXT: ret void
96+
//
97+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za32_mf8_mf8_fpmu13__SVMfloat8_tS_m(
98+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
99+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
100+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
101+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
102+
// CPP-CHECK-NEXT: ret void
103+
//
104+
void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
105+
SME_ACLE_FUNC(svmop4a_1x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
106+
}
107+
108+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za32_mf8_mf8_fpm(
109+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
110+
// CHECK-NEXT: [[ENTRY:.*:]]
111+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
112+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
113+
// CHECK-NEXT: ret void
114+
//
115+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za32_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm(
116+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
117+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
118+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
119+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
120+
// CPP-CHECK-NEXT: ret void
121+
//
122+
void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
123+
SME_ACLE_FUNC(svmop4a_1x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
124+
}
125+
126+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za32_mf8_mf8_fpm(
127+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
128+
// CHECK-NEXT: [[ENTRY:.*:]]
129+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
130+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
131+
// CHECK-NEXT: ret void
132+
//
133+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm(
134+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
135+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
136+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
137+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
138+
// CPP-CHECK-NEXT: ret void
139+
//
140+
void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
141+
SME_ACLE_FUNC(svmop4a_2x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
142+
}
143+
144+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za32_mf8_mf8_fpm(
145+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
146+
// CHECK-NEXT: [[ENTRY:.*:]]
147+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
148+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
149+
// CHECK-NEXT: ret void
150+
//
151+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za32_mf8_mf8_fpm13svmfloat8x2_tS_m(
152+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
153+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
154+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
155+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
156+
// CPP-CHECK-NEXT: ret void
157+
//
158+
void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
159+
SME_ACLE_FUNC(svmop4a_2x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
160+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
3+
// RUN: -target-feature +sme -target-feature +sme2p2 -target-feature +sme-mop4 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -fsyntax-only -verify %s
4+
5+
// REQUIRES: aarch64-registered-target
6+
7+
#include <arm_sme.h>
8+
9+
void tests_mop4_imm_1x1(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
10+
svmop4a_1x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
11+
svmop4a_1x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
12+
return;
13+
}
14+
15+
void tests_mop4_imm_1x2(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
16+
svmop4a_1x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
17+
svmop4a_1x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
18+
return;
19+
}
20+
21+
void tests_mop4_imm_2x1(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
22+
svmop4a_2x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
23+
svmop4a_2x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
24+
return;
25+
}
26+
27+
void tests_mop4_imm_2x2(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
28+
svmop4a_2x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
29+
svmop4a_2x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
30+
return;
31+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3086,6 +3086,39 @@ let TargetPrefix = "aarch64" in {
30863086
}
30873087
}
30883088

3089+
class SME_FP8_OuterProduct_Intrinsic_Single_Single
3090+
: DefaultAttrsIntrinsic<[],
3091+
[llvm_i32_ty,
3092+
llvm_nxv16i8_ty,
3093+
llvm_nxv16i8_ty],
3094+
[ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3095+
3096+
class SME_FP8_OuterProduct_Intrinsic_Single_Multi
3097+
: DefaultAttrsIntrinsic<[],
3098+
[llvm_i32_ty,
3099+
llvm_nxv16i8_ty,
3100+
llvm_nxv16i8_ty,
3101+
llvm_nxv16i8_ty],
3102+
[ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3103+
3104+
class SME_FP8_OuterProduct_Intrinsic_Multi_Multi
3105+
: DefaultAttrsIntrinsic<[],
3106+
[llvm_i32_ty,
3107+
llvm_nxv16i8_ty,
3108+
llvm_nxv16i8_ty,
3109+
llvm_nxv16i8_ty,
3110+
llvm_nxv16i8_ty],
3111+
[ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
3112+
3113+
def int_aarch64_sme_fp8_fmop4a_za16_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single;
3114+
def int_aarch64_sme_fp8_fmop4a_za32_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single;
3115+
def int_aarch64_sme_fp8_fmop4a_za16_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi;
3116+
def int_aarch64_sme_fp8_fmop4a_za32_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi;
3117+
def int_aarch64_sme_fp8_fmop4a_za16_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi;
3118+
def int_aarch64_sme_fp8_fmop4a_za32_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi;
3119+
def int_aarch64_sme_fp8_fmop4a_za16_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi;
3120+
def int_aarch64_sme_fp8_fmop4a_za32_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi;
3121+
30893122
class SME_AddVectorToTile_Intrinsic
30903123
: DefaultAttrsIntrinsic<[],
30913124
[llvm_i32_ty,
@@ -4075,6 +4108,7 @@ let TargetPrefix = "aarch64" in {
40754108
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
40764109
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
40774110
[IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4111+
40784112
//
40794113
// CVT from FP8 to half-precision/BFloat16 multi-vector
40804114
//

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,8 +1090,8 @@ let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in {
10901090
def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
10911091
}
10921092

1093-
let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in {
1094-
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">;
1093+
let Predicates = [HasSME_MOP4, HasSMEF8F16] in {
1094+
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za16">;
10951095
}
10961096

10971097
let Predicates = [HasSME_TMOP, HasSMEF16F16] in {
@@ -1108,10 +1108,8 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in {
11081108
defm BFMUL : sme2_bfmul_multi<"bfmul">;
11091109
}
11101110

1111-
let Uses = [FPMR, FPCR] in {
11121111
let Predicates = [HasSME_MOP4, HasSMEF8F32] in {
1113-
defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">;
1114-
}
1112+
defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za32">;
11151113
}
11161114

11171115
let Predicates = [HasSME_MOP4, HasSMEB16B16] in {

0 commit comments

Comments
 (0)