Skip to content

Commit e9af57d

Browse files
[Clang][SME2] Add builtins for moving multi-vectors to/from ZA (#71191)
Adds the following SME2 builtins: - svread_hor/ver, - svwrite_hor/ver, - svread_za64, - svwrite_za64 See ARM-software/acle#217
1 parent 6905438 commit e9af57d

File tree

7 files changed

+3814
-4
lines changed

7 files changed

+3814
-4
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,44 @@ multiclass ZAAddSub<string n_suffix> {
299299
defm SVADD : ZAAddSub<"add">;
300300
defm SVSUB : ZAAddSub<"sub">;
301301

302+
// SME2 - MOVA
303+
304+
//
305+
// Single, 2 and 4 vector-group read/write intrinsics.
306+
//
307+
308+
multiclass ZAWrite_VG<string n, string t, string i, list<ImmCheck> checks> {
309+
def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>;
310+
def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>;
311+
def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>;
312+
def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>;
313+
def NAME # _VG1x2 : Inst<"svwrite_" # n # "[_{d}]_vg1x2", "vm2", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsStreaming], []>;
314+
def NAME # _VG1x4 : Inst<"svwrite_" # n # "[_{d}]_vg1x4", "vm4", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsStreaming], []>;
315+
}
316+
317+
let TargetGuard = "sme2" in {
318+
defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
319+
defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
320+
defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
321+
defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
322+
}
323+
324+
multiclass ZARead_VG<string n, string t, string i, list<ImmCheck> checks> {
325+
def NAME # _VG2_H : Inst<"svread_hor_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
326+
def NAME # _VG2_V : Inst<"svread_ver_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
327+
def NAME # _VG4_H : Inst<"svread_hor_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
328+
def NAME # _VG4_V : Inst<"svread_ver_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
329+
def NAME # _VG1x2 : Inst<"svread_" # n # "_{d}_vg1x2", "2m", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>;
330+
def NAME # _VG1x4 : Inst<"svread_" # n # "_{d}_vg1x4", "4m", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>;
331+
}
332+
333+
let TargetGuard = "sme2" in {
334+
defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
335+
defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>;
336+
defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>;
337+
defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>;
338+
}
339+
302340
//
303341
// Outer product and accumulate/subtract
304342
//

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c

Lines changed: 1880 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c

Lines changed: 1457 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,57 @@
55

66
#include <arm_sme_draft_spec_subject_to_change.h>
77

8+
void test_multivector_read(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za {
9+
10+
// Test Tile Range
11+
svread_hor_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
12+
svread_ver_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
13+
svread_hor_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
14+
svread_ver_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
15+
16+
svread_hor_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
17+
svread_ver_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
18+
svread_hor_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
19+
svread_ver_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
20+
21+
svread_hor_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
22+
svread_ver_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
23+
svread_hor_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
24+
svread_ver_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
25+
26+
svread_hor_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
27+
svread_ver_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
28+
svread_hor_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
29+
svread_ver_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
30+
}
31+
32+
void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4,
33+
svuint16x2_t v16x2, svuint16x4_t v16x4,
34+
svuint32x2_t v32x2, svuint32x4_t v32x4,
35+
svuint64x2_t v64x2, svuint64x4_t v64x4) __arm_streaming __arm_shared_za {
36+
37+
// Test Tile Range
38+
svwrite_hor_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
39+
svwrite_ver_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
40+
svwrite_hor_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
41+
svwrite_ver_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
42+
43+
svwrite_hor_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
44+
svwrite_ver_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
45+
svwrite_hor_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
46+
svwrite_ver_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
47+
48+
svwrite_hor_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
49+
svwrite_ver_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
50+
svwrite_hor_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
51+
svwrite_ver_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
52+
53+
svwrite_hor_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
54+
svwrite_ver_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
55+
svwrite_hor_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
56+
svwrite_ver_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
57+
}
58+
859
void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za {
960
// Test Tile Range
1061
svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3718,8 +3718,14 @@ multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator i
37183718

37193719
def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
37203720

3721-
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3722-
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
3721+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
3722+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
3723+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
3724+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3725+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
3726+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
3727+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3728+
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
37233729

37243730
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
37253731
MatrixOp8,
@@ -3811,8 +3817,14 @@ multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator i
38113817

38123818
def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
38133819

3814-
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3815-
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
3820+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
3821+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
3822+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
3823+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3824+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
3825+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
3826+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3827+
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
38163828

38173829
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
38183830
MatrixOp8,

0 commit comments

Comments
 (0)