Skip to content

Commit 7934fce

Browse files
[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (#88499)
… single According to the specification in ARM-software/acle#309 this adds the intrinsics // And similarly for u8. svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // And similarly for u16, bf16 and f16. svint16_t svreadz_hor_za16_s16(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // And similarly for u32 and f32. svint32_t svreadz_hor_za32_s32(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // And similarly for u64 and f64. svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64 svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");
1 parent 937d79b commit 7934fce

File tree

8 files changed

+991
-29
lines changed

8 files changed

+991
-29
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,4 +787,22 @@ defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCh
787787
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
788788
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
789789

790+
791+
multiclass ZAReadzSingle<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
792+
let SMETargetGuard = "sme2p1" in {
793+
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}", "dim", t,
794+
MergeNone, i_prefix # "_horiz",
795+
[IsStreaming, IsInOutZA], ch>;
796+
797+
def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}", "dim", t,
798+
MergeNone, i_prefix # "_vert",
799+
[IsStreaming, IsInOutZA], ch>;
800+
}
801+
}
802+
803+
defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
804+
defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
805+
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
806+
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
807+
defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
790808
} // let SVETargetGuard = InvalidMode

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 410 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
2+
// RUN: -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -fsyntax-only -verify %s
3+
4+
// REQUIRES: aarch64-registered-target
5+
6+
#include <arm_sme.h>
7+
8+
void tests_readz_tile_to_vector_single(uint32_t slice) __arm_streaming __arm_inout("za") {
9+
svreadz_hor_za8_s8(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
10+
svreadz_hor_za16_s16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
11+
svreadz_hor_za32_s32(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
12+
svreadz_hor_za64_s64(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
13+
svreadz_hor_za128_s8(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
14+
svreadz_hor_za128_s16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
15+
svreadz_hor_za128_s32(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
16+
svreadz_hor_za128_s64(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
17+
svreadz_hor_za128_bf16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
18+
return;
19+
}
20+
21+

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,6 +2858,16 @@ let TargetPrefix = "aarch64" in {
28582858
def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
28592859
def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
28602860

2861+
class SME_MOVAZ_TileToVector_Intrinsic
2862+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
2863+
[llvm_i32_ty, llvm_i32_ty],
2864+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2865+
2866+
def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic;
2867+
def int_aarch64_sme_readz_vert : SME_MOVAZ_TileToVector_Intrinsic;
2868+
2869+
def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic;
2870+
def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic;
28612871

28622872
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28632873

@@ -3681,12 +3691,12 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic;
36813691
// SVE2.1 - Move predicate to/from vector
36823692
//
36833693
def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic;
3684-
3694+
36853695
def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic;
36863696

36873697
def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic;
3688-
36893698
def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;
36903699

36913700
def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
36923701
def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
3702+

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2986,7 +2986,7 @@ AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
29862986
bool HasTile = BaseReg != AArch64::ZA;
29872987
bool HasZPROut = HasTile && MI.getOperand(0).isReg();
29882988
if (HasZPROut) {
2989-
MIB.add(MI.getOperand(0)); // Output ZPR
2989+
MIB.add(MI.getOperand(StartIdx)); // Output ZPR
29902990
++StartIdx;
29912991
}
29922992
if (HasTile) {

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,8 @@ defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64
814814
}
815815

816816
let Predicates = [HasSME2p1] in {
817-
defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz">;
817+
defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz, int_aarch64_sme_readz_vert,
818+
int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>;
818819
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
819820
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
820821
defm MOVAZ_VG2_2ZM : sme2_mova_array_to_vec_vg2_multi<0b010, "movaz">;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 80 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
111111
let usesCustomInserter = 1;
112112
}
113113

114-
class sme2_movez_to_tile_multi_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
114+
class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115115
: SMEPseudo2Instr<name, 0>,
116116
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117117
let SMEMatrixType = za_flag;
@@ -205,6 +205,11 @@ class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand
205205
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
206206
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
207207
(!cast<Instruction>(name) $base, $offset)>;
208+
209+
class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice>
210+
: Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))),
211+
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>;
212+
208213
//===----------------------------------------------------------------------===//
209214
// SME pattern match helpers.
210215
//===----------------------------------------------------------------------===//
@@ -4099,21 +4104,22 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
40994104
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
41004105
}
41014106

4107+
41024108
// SME2p1 move tile to vector and zero tile, two registers
41034109
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
41044110
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
41054111
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
41064112

41074113

4108-
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4109-
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4110-
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4111-
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4114+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4115+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4116+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4117+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
41124118

4113-
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4114-
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4115-
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4116-
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4119+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4120+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4121+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4122+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
41174123
}
41184124

41194125
class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
@@ -4246,15 +4252,15 @@ multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
42464252
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
42474253
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
42484254

4249-
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4250-
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4251-
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4252-
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4255+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4256+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4257+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4258+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
42534259

4254-
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4255-
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4256-
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4257-
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4260+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4261+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4262+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4263+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
42584264
}
42594265

42604266

@@ -4788,14 +4794,14 @@ class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty
47884794
multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
47894795
def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
47904796
!if(v, TileVectorOpV8, TileVectorOpH8),
4791-
sme_elm_idx0_15, mnemonic> {
4797+
sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
47924798
bits<4> imm;
47934799
let Inst{8-5} = imm;
47944800
}
47954801

47964802
def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
47974803
!if(v, TileVectorOpV16, TileVectorOpH16),
4798-
sme_elm_idx0_7, mnemonic> {
4804+
sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
47994805
bits<1> ZAn;
48004806
bits<3> imm;
48014807
let Inst{8} = ZAn;
@@ -4804,7 +4810,7 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
48044810

48054811
def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
48064812
!if(v, TileVectorOpV32, TileVectorOpH32),
4807-
sme_elm_idx0_3, mnemonic> {
4813+
sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
48084814
bits<2> ZAn;
48094815
bits<2> imm;
48104816
let Inst{8-7} = ZAn;
@@ -4813,7 +4819,7 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
48134819

48144820
def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
48154821
!if(v, TileVectorOpV64, TileVectorOpH64),
4816-
sme_elm_idx0_1, mnemonic> {
4822+
sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
48174823
bits<3> ZAn;
48184824
bits<1> imm;
48194825
let Inst{8-6} = ZAn;
@@ -4822,15 +4828,66 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
48224828

48234829
def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
48244830
!if(v, TileVectorOpV128, TileVectorOpH128),
4825-
sme_elm_idx0_0, mnemonic> {
4831+
sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr<NAME # _Q, 1> {
48264832
bits<4> ZAn;
48274833
let Inst{8-5} = ZAn;
48284834
}
48294835
}
48304836

4831-
multiclass sme2p1_movaz_tile_to_vec<string mnemonic>{
4837+
multiclass sme2p1_movaz_tile_to_vec<string mnemonic, SDPatternOperator intrinsic_horiz, SDPatternOperator intrinsic_vert,
4838+
SDPatternOperator intrinsic_horiz_q, SDPatternOperator intrinsic_vert_q>{
48324839
defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
48334840
defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
4841+
4842+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
4843+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
4844+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
4845+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
4846+
def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
4847+
4848+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
4849+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
4850+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
4851+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
4852+
def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
4853+
4854+
def : SME2_Tile_Movaz_Pat<NAME # _H_B, intrinsic_horiz, nxv16i8,sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
4855+
def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4856+
def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4857+
def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4858+
def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4859+
def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4860+
def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4861+
def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4862+
4863+
def : SME2_Tile_Movaz_Pat<NAME # _V_B, intrinsic_vert, nxv16i8, sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
4864+
def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4865+
def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4866+
def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4867+
def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4868+
def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4869+
def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4870+
def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4871+
4872+
// H_Q
4873+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4874+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4875+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4876+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4877+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4878+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4879+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4880+
def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4881+
4882+
// _V_Q
4883+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4884+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4885+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4886+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4887+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4888+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4889+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4890+
def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
48344891
}
48354892

48364893
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)