Skip to content

Commit c705f70

Browse files
[CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors
According to the specification in ARM-software/acle#309 this adds the intrinsics // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");
1 parent 515e048 commit c705f70

File tree

9 files changed

+2089
-1
lines changed

9 files changed

+2089
-1
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,4 +764,27 @@ let SMETargetGuard = "sme-f16f16" in {
764764
[ImmCheck<0, ImmCheck0_1>]>;
765765
}
766766

767+
768+
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
769+
let SMETargetGuard = "sme2p1" in {
770+
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
771+
MergeNone, i_prefix # "_horiz_x" # vg_num,
772+
[IsStreaming, IsInOutZA], ch>;
773+
774+
def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
775+
MergeNone, i_prefix # "_vert_x" #vg_num,
776+
[IsStreaming, IsInOutZA], ch>;
777+
}
778+
}
779+
780+
defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
781+
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
782+
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
783+
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
784+
785+
defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
786+
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
787+
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
788+
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
789+
767790
} // let SVETargetGuard = InvalidMode

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 1414 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,6 +2841,24 @@ let TargetPrefix = "aarch64" in {
28412841
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
28422842
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
28432843

2844+
class SME_MOVAZ_TileToVector_X2_Intrinsic
2845+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2846+
[llvm_i32_ty, llvm_i32_ty],
2847+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2848+
2849+
class SME_MOVAZ_TileToVector_X4_Intrinsic
2850+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
2851+
LLVMMatchType<0>,LLVMMatchType<0>],
2852+
[llvm_i32_ty, llvm_i32_ty],
2853+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2854+
2855+
def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2856+
def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2857+
2858+
def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2859+
def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2860+
2861+
28442862
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28452863

28462864
class SME_OuterProduct_Intrinsic

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
395395
template <unsigned MaxIdx, unsigned Scale>
396396
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397397
unsigned Op);
398-
398+
template <unsigned MaxIdx, unsigned Scale>
399+
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op);
399400
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
400401
/// SVE Reg+Imm addressing mode.
401402
template <int64_t Min, int64_t Max>
@@ -2003,6 +2004,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
20032004
CurDAG->RemoveDeadNode(N);
20042005
}
20052006

2007+
template <unsigned MaxIdx, unsigned Scale>
2008+
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2009+
unsigned Op) {
2010+
2011+
SDValue SliceBase = N->getOperand(3);
2012+
SDValue Base, Offset;
2013+
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2014+
return;
2015+
// The correct Za tile number is computed in Machine Instruction
2016+
// See EmitTileMovaz
2017+
// DAG cannot select Za tile as an output register with ZReg
2018+
SDLoc DL(N);
2019+
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
2020+
/*Chain*/ N->getOperand(0)};
2021+
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2022+
2023+
EVT VT = N->getValueType(0);
2024+
for (unsigned I = 0; I < NumVecs; ++I)
2025+
ReplaceUses(SDValue(N, I),
2026+
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2027+
SDValue(Mov, 0)));
2028+
2029+
// Copy chain
2030+
unsigned ChainIdx = NumVecs;
2031+
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2032+
CurDAG->RemoveDeadNode(N);
2033+
}
2034+
20062035
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
20072036
unsigned NumOutVecs,
20082037
bool IsTupleInput,
@@ -5245,6 +5274,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
52455274
AArch64::MOVA_VG4_4ZMXI);
52465275
return;
52475276
}
5277+
case Intrinsic::aarch64_sme_readz_horiz_x2: {
5278+
if (VT == MVT::nxv16i8) {
5279+
SelectMultiVectorMoveZ<14, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO);
5280+
return;
5281+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5282+
VT == MVT::nxv8bf16) {
5283+
SelectMultiVectorMoveZ<6, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO);
5284+
return;
5285+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5286+
SelectMultiVectorMoveZ<2, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO);
5287+
return;
5288+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5289+
SelectMultiVectorMoveZ<0, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO);
5290+
return;
5291+
}
5292+
break;
5293+
}
5294+
case Intrinsic::aarch64_sme_readz_vert_x2: {
5295+
if (VT == MVT::nxv16i8) {
5296+
SelectMultiVectorMoveZ<14, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO);
5297+
return;
5298+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5299+
VT == MVT::nxv8bf16) {
5300+
SelectMultiVectorMoveZ<6, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO);
5301+
return;
5302+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5303+
SelectMultiVectorMoveZ<2, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO);
5304+
return;
5305+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5306+
SelectMultiVectorMoveZ<0, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO);
5307+
return;
5308+
}
5309+
break;
5310+
}
5311+
case Intrinsic::aarch64_sme_readz_horiz_x4: {
5312+
if (VT == MVT::nxv16i8) {
5313+
SelectMultiVectorMoveZ<12, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO);
5314+
return;
5315+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5316+
VT == MVT::nxv8bf16) {
5317+
SelectMultiVectorMoveZ<4, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO);
5318+
return;
5319+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO);
5321+
return;
5322+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5323+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO);
5324+
return;
5325+
}
5326+
break;
5327+
}
5328+
case Intrinsic::aarch64_sme_readz_vert_x4: {
5329+
if (VT == MVT::nxv16i8) {
5330+
SelectMultiVectorMoveZ<12, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO);
5331+
return;
5332+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5333+
VT == MVT::nxv8bf16) {
5334+
SelectMultiVectorMoveZ<4, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO);
5335+
return;
5336+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5337+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO);
5338+
return;
5339+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5340+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO);
5341+
return;
5342+
}
5343+
break;
5344+
}
52485345
case Intrinsic::swift_async_context_addr: {
52495346
SDLoc DL(Node);
52505347
SDValue Chain = Node->getOperand(0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2941,6 +2941,23 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
29412941
return BB;
29422942
}
29432943

2944+
MachineBasicBlock *
2945+
AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg,
2946+
MachineInstr &MI,
2947+
MachineBasicBlock *BB) const {
2948+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2949+
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2950+
2951+
MIB.add(MI.getOperand(0)); // ZReg
2952+
MIB.addReg(BaseReg + MI.getOperand(1).getImm(),
2953+
RegState::Define); // add as output
2954+
MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // add as input
2955+
MIB.add(MI.getOperand(2)); // slice index register
2956+
MIB.add(MI.getOperand(3)); // slice index offset
2957+
MI.eraseFromParent(); // The pseudo is gone now.
2958+
return BB;
2959+
}
2960+
29442961
MachineBasicBlock *
29452962
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
29462963
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -3178,6 +3195,38 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
31783195
return EmitZero(MI, BB);
31793196
case AArch64::ZERO_T_PSEUDO:
31803197
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3198+
case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3199+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_B, AArch64::ZAB0, MI, BB);
3200+
case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3201+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_H, AArch64::ZAH0, MI, BB);
3202+
case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3203+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_S, AArch64::ZAS0, MI, BB);
3204+
case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3205+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_D, AArch64::ZAD0, MI, BB);
3206+
case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3207+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_B, AArch64::ZAB0, MI, BB);
3208+
case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3209+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_H, AArch64::ZAH0, MI, BB);
3210+
case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3211+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_S, AArch64::ZAS0, MI, BB);
3212+
case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3213+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_D, AArch64::ZAD0, MI, BB);
3214+
case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3215+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_B, AArch64::ZAB0, MI, BB);
3216+
case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3217+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_H, AArch64::ZAH0, MI, BB);
3218+
case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3219+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_S, AArch64::ZAS0, MI, BB);
3220+
case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3221+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_D, AArch64::ZAD0, MI, BB);
3222+
case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3223+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_B, AArch64::ZAB0, MI, BB);
3224+
case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3225+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_H, AArch64::ZAH0, MI, BB);
3226+
case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3227+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_S, AArch64::ZAS0, MI, BB);
3228+
case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3229+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_D, AArch64::ZAD0, MI, BB);
31813230
}
31823231
}
31833232

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,9 @@ class AArch64TargetLowering : public TargetLowering {
653653
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654654
MachineInstr &MI,
655655
MachineBasicBlock *BB) const;
656+
MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg,
657+
MachineInstr &MI,
658+
MachineBasicBlock *BB) const;
656659
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657660
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658661
MachineInstr &MI, MachineBasicBlock *BB,

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
111111
let usesCustomInserter = 1;
112112
}
113113

114+
class sme2_movez_to_tile_multi_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115+
: SMEPseudo2Instr<name, 0>,
116+
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117+
let SMEMatrixType = za_flag;
118+
let usesCustomInserter = 1;
119+
}
114120
//===----------------------------------------------------------------------===//
115121
// SME pattern match helpers.
116122
//===----------------------------------------------------------------------===//
@@ -4097,6 +4103,17 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
40974103
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
40984104
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
40994105
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4106+
4107+
4108+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4109+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4110+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4111+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4112+
4113+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4114+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4115+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4116+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
41004117
}
41014118

41024119
class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
@@ -4228,6 +4245,16 @@ multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
42284245
multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
42294246
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
42304247
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4248+
4249+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4250+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4251+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4252+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4253+
4254+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4255+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4256+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4257+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
42314258
}
42324259

42334260

0 commit comments

Comments
 (0)