-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][Clang] Update untyped sme intrinsics with fp8 variants #124543
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: None (Lukacma) ChangesThis patch adds fp8 variants to the following untyped SME intrinsics based on ACLE:
Patch is 120.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124543.diff 19 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 891ed9874bb3d0..51cab572ce357d 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -110,11 +110,11 @@ multiclass ZARead<string n_suffix, string t, string i_prefix, list<ImmCheck> ch>
}
}
-defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>;
+defm SVREAD_ZA8 : ZARead<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>;
defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>]>;
defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>]>;
-defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>;
+defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>;
////////////////////////////////////////////////////////////////////////////////
// Write horizontal/vertical ZA slices
@@ -131,11 +131,11 @@ multiclass ZAWrite<string n_suffix, string t, string i_prefix, list<ImmCheck> ch
}
}
-defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVWRITE_ZA8 : ZAWrite<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>;
+defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>;
////////////////////////////////////////////////////////////////////////////////
// SME - Zero
@@ -350,7 +350,7 @@ multiclass ZAWrite_VG<string n, string t, string i, list<ImmCheck> checks> {
}
let SMETargetGuard = "sme2" in {
- defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
+ defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
@@ -366,7 +366,7 @@ multiclass ZARead_VG<string n, string t, string i, list<ImmCheck> checks> {
}
let SMETargetGuard = "sme2" in {
- defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
+ defm SVREAD_ZA8 : ZARead_VG<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>;
@@ -722,7 +722,7 @@ def IN_STREAMING_MODE : Inst<"__arm_in_streaming_mode", "sv", "Pc", MergeNone,
// lookup table expand four contiguous registers
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>;
}
@@ -730,16 +730,16 @@ let SMETargetGuard = "sme2" in {
// lookup table expand one register
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
- def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
}
//
// lookup table expand two contiguous registers
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
- def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
}
//
@@ -811,12 +811,12 @@ multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, li
}
}
-defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
@@ -834,15 +834,15 @@ multiclass ZAReadzSingle<string n_suffix, string t, string i_prefix, list<ImmChe
}
}
-defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
+defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlmbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
multiclass ZAReadzArray<string vg_num>{
let SMETargetGuard = "sme2p1" in {
- def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
+ def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUcm", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index e7001bac450e89..12cca992a21ab7 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2104,7 +2104,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>;
-defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
+defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlmbhfd", "aarch64_sve_revd">;
}
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2223,8 +2223,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
// 2-way and 4-way selects
- def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>;
- def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>;
+ def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>;
+ def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>;
// SRSHL / URSHL
def SVSRSHL_SINGLE_X2 : SInst<"svrshl[_single_{d}_x2]", "22d", "csil", MergeNone, "aarch64_sve_srshl_single_x2", [IsStreaming], []>;
@@ -2402,15 +2402,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
//
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
- def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>;
- def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>;
- def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>;
- def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>;
-
- def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>;
- def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>;
- def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>;
- def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>;
+ def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>;
+ def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>;
+ def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>;
+ def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>;
+
+ def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>;
+ def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>;
+ def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>;
+ def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>;
}
//
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
index 508fad09ea715b..0605f9eef036fd 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
@@ -318,6 +318,41 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_hor_za8_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za8_mf8_1u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za8_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ uint32_t slice = slice_base + 15;
+ return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice);
+}
+
// CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za16_f16(
// CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
@@ -754,6 +789,38 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za128_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za128_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z27test_svread_hor_za128_mf8_1u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za128_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 15, slice_base);
+}
+
// CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za128_f16(
// CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
@@ -1202,6 +1269,41 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_ver_za8_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_ver_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_ver_za8, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' c4ef805b0bda16f734276086b0984583c2e21db6 6d44b6f0a8152f8ec365526c906676387087605e clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_write.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM,
Before merging, check if the test for test_svwrite_za8_s8_vg1x4 was actually removed as it looks like.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/15368 Here is the relevant piece of the build log for the reference
|
…m#124543) This patch adds fp8 variants to the following untyped SME intrinsics based on [ACLE](https://github.com/ARM-software/acle/blob/main/main/acle.md): ``` SVREVD SVSEL_X2 SVSEL_X4 SVZIP_X2 SVZIPQ_X2 SVZIP_X4 SVZIPQ_X4 SVUZP_X2 SVUZPQ_X2 SVUZP_X4 SVUZPQ_X4 SVREAD_ZA8_H SVREAD_ZA8_V SVREAD_ZA128 SVWRITE_ZA8_H SVWRITE_ZA8_V SVWRITE_ZA128 SVREAD_ZA8_VG2_H SVREAD_ZA8_VG2_V SVREAD_ZA8_VG4_H SVREAD_ZA8_VG4_V SVREAD_ZA8_VG1x2 SVREAD_ZA8_VG1x4 SVWRITE_ZA8_VG2_H SVWRITE_ZA8_VG2_V SVWRITE_ZA8_VG4_H SVWRITE_ZA8_VG4_V SVWRITE_ZA8_VG1x2 SVWRITE_ZA8_VG1x4 SVLUTI2_LANE_ZT_X4 SVLUTI2_LANE_ZT SVLUTI4_LANE_ZT SVLUTI2_LANE_ZT_X2 SVLUTI4_LANE_ZT_X2 SVREADZ_ZA8_X2_H SVREADZ_ZA8_X2_V SVREADZ_ZA8_X4_H SVREADZ_ZA8_X4_V SVREADZ_ZA8_H SVREADZ_ZA8_V SVREADZ_VG2_B SVREADZ_VG4_B ```
This patch adds fp8 variants to the following untyped SME intrinsics based on ACLE: