Skip to content

Commit 221dcd9

Browse files
[CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors
According to the specification in ARM-software/acle#309 this adds the intrinsics // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");
1 parent fadd1ec commit 221dcd9

File tree

9 files changed

+2089
-1
lines changed

9 files changed

+2089
-1
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,3 +761,26 @@ let TargetGuard = "sme-f16f16" in {
761761
[IsStreaming, IsInOutZA],
762762
[ImmCheck<0, ImmCheck0_1>]>;
763763
}
764+
765+
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
766+
let TargetGuard = "sme2p1" in {
767+
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
768+
MergeNone, i_prefix # "_horiz_x" # vg_num,
769+
[IsStreaming, IsInOutZA], ch>;
770+
771+
def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
772+
MergeNone, i_prefix # "_vert_x" #vg_num,
773+
[IsStreaming, IsInOutZA], ch>;
774+
}
775+
}
776+
777+
defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
778+
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
779+
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
780+
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
781+
782+
defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
783+
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
784+
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
785+
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
786+

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 1414 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,6 +2841,24 @@ let TargetPrefix = "aarch64" in {
28412841
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
28422842
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
28432843

2844+
class SME_MOVAZ_TileToVector_X2_Intrinsic
2845+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2846+
[llvm_i32_ty, llvm_i32_ty],
2847+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2848+
2849+
class SME_MOVAZ_TileToVector_X4_Intrinsic
2850+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
2851+
LLVMMatchType<0>,LLVMMatchType<0>],
2852+
[llvm_i32_ty, llvm_i32_ty],
2853+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2854+
2855+
def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2856+
def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2857+
2858+
def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2859+
def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2860+
2861+
28442862
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28452863

28462864
class SME_OuterProduct_Intrinsic

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
395395
template <unsigned MaxIdx, unsigned Scale>
396396
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397397
unsigned Op);
398-
398+
template <unsigned MaxIdx, unsigned Scale>
399+
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op);
399400
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
400401
/// SVE Reg+Imm addressing mode.
401402
template <int64_t Min, int64_t Max>
@@ -2003,6 +2004,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
20032004
CurDAG->RemoveDeadNode(N);
20042005
}
20052006

2007+
template <unsigned MaxIdx, unsigned Scale>
2008+
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2009+
unsigned Op) {
2010+
2011+
SDValue SliceBase = N->getOperand(3);
2012+
SDValue Base, Offset;
2013+
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2014+
return;
2015+
// The correct Za tile number is computed in Machine Instruction
2016+
// See EmitTileMovaz
2017+
// DAG cannot select Za tile as an output register with ZReg
2018+
SDLoc DL(N);
2019+
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
2020+
/*Chain*/ N->getOperand(0)};
2021+
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2022+
2023+
EVT VT = N->getValueType(0);
2024+
for (unsigned I = 0; I < NumVecs; ++I)
2025+
ReplaceUses(SDValue(N, I),
2026+
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2027+
SDValue(Mov, 0)));
2028+
2029+
// Copy chain
2030+
unsigned ChainIdx = NumVecs;
2031+
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2032+
CurDAG->RemoveDeadNode(N);
2033+
}
2034+
20062035
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
20072036
unsigned NumOutVecs,
20082037
bool IsTupleInput,
@@ -5243,6 +5272,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
52435272
AArch64::MOVA_VG4_4ZMXI);
52445273
return;
52455274
}
5275+
case Intrinsic::aarch64_sme_readz_horiz_x2: {
5276+
if (VT == MVT::nxv16i8) {
5277+
SelectMultiVectorMoveZ<14, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO);
5278+
return;
5279+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5280+
VT == MVT::nxv8bf16) {
5281+
SelectMultiVectorMoveZ<6, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO);
5282+
return;
5283+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5284+
SelectMultiVectorMoveZ<2, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO);
5285+
return;
5286+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5287+
SelectMultiVectorMoveZ<0, 2>(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO);
5288+
return;
5289+
}
5290+
break;
5291+
}
5292+
case Intrinsic::aarch64_sme_readz_vert_x2: {
5293+
if (VT == MVT::nxv16i8) {
5294+
SelectMultiVectorMoveZ<14, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO);
5295+
return;
5296+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5297+
VT == MVT::nxv8bf16) {
5298+
SelectMultiVectorMoveZ<6, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO);
5299+
return;
5300+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5301+
SelectMultiVectorMoveZ<2, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO);
5302+
return;
5303+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5304+
SelectMultiVectorMoveZ<0, 2>(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO);
5305+
return;
5306+
}
5307+
break;
5308+
}
5309+
case Intrinsic::aarch64_sme_readz_horiz_x4: {
5310+
if (VT == MVT::nxv16i8) {
5311+
SelectMultiVectorMoveZ<12, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO);
5312+
return;
5313+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5314+
VT == MVT::nxv8bf16) {
5315+
SelectMultiVectorMoveZ<4, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO);
5316+
return;
5317+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5318+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO);
5319+
return;
5320+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5321+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO);
5322+
return;
5323+
}
5324+
break;
5325+
}
5326+
case Intrinsic::aarch64_sme_readz_vert_x4: {
5327+
if (VT == MVT::nxv16i8) {
5328+
SelectMultiVectorMoveZ<12, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO);
5329+
return;
5330+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5331+
VT == MVT::nxv8bf16) {
5332+
SelectMultiVectorMoveZ<4, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO);
5333+
return;
5334+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5335+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO);
5336+
return;
5337+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5338+
SelectMultiVectorMoveZ<0, 4>(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO);
5339+
return;
5340+
}
5341+
break;
5342+
}
52465343
case Intrinsic::swift_async_context_addr: {
52475344
SDLoc DL(Node);
52485345
SDValue Chain = Node->getOperand(0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2914,6 +2914,23 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
29142914
return BB;
29152915
}
29162916

2917+
MachineBasicBlock *
2918+
AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg,
2919+
MachineInstr &MI,
2920+
MachineBasicBlock *BB) const {
2921+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2922+
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2923+
2924+
MIB.add(MI.getOperand(0)); // ZReg
2925+
MIB.addReg(BaseReg + MI.getOperand(1).getImm(),
2926+
RegState::Define); // add as output
2927+
MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // add as input
2928+
MIB.add(MI.getOperand(2)); // slice index register
2929+
MIB.add(MI.getOperand(3)); // slice index offset
2930+
MI.eraseFromParent(); // The pseudo is gone now.
2931+
return BB;
2932+
}
2933+
29172934
MachineBasicBlock *
29182935
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
29192936
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -3074,6 +3091,38 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
30743091
return EmitZero(MI, BB);
30753092
case AArch64::ZERO_T_PSEUDO:
30763093
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3094+
case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3095+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_B, AArch64::ZAB0, MI, BB);
3096+
case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3097+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_H, AArch64::ZAH0, MI, BB);
3098+
case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3099+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_S, AArch64::ZAS0, MI, BB);
3100+
case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3101+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_D, AArch64::ZAD0, MI, BB);
3102+
case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3103+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_B, AArch64::ZAB0, MI, BB);
3104+
case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3105+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_H, AArch64::ZAH0, MI, BB);
3106+
case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3107+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_S, AArch64::ZAS0, MI, BB);
3108+
case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3109+
return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_D, AArch64::ZAD0, MI, BB);
3110+
case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3111+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_B, AArch64::ZAB0, MI, BB);
3112+
case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3113+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_H, AArch64::ZAH0, MI, BB);
3114+
case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3115+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_S, AArch64::ZAS0, MI, BB);
3116+
case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3117+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_D, AArch64::ZAD0, MI, BB);
3118+
case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3119+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_B, AArch64::ZAB0, MI, BB);
3120+
case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3121+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_H, AArch64::ZAH0, MI, BB);
3122+
case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3123+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_S, AArch64::ZAS0, MI, BB);
3124+
case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3125+
return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_D, AArch64::ZAD0, MI, BB);
30773126
}
30783127
}
30793128

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,9 @@ class AArch64TargetLowering : public TargetLowering {
648648
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
649649
MachineInstr &MI,
650650
MachineBasicBlock *BB) const;
651+
MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg,
652+
MachineInstr &MI,
653+
MachineBasicBlock *BB) const;
651654
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
652655
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
653656
MachineInstr &MI, MachineBasicBlock *BB,

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
111111
let usesCustomInserter = 1;
112112
}
113113

114+
class sme2_movez_to_tile_multi_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115+
: SMEPseudo2Instr<name, 0>,
116+
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117+
let SMEMatrixType = za_flag;
118+
let usesCustomInserter = 1;
119+
}
114120
//===----------------------------------------------------------------------===//
115121
// SME pattern match helpers.
116122
//===----------------------------------------------------------------------===//
@@ -4097,6 +4103,17 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
40974103
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
40984104
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
40994105
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4106+
4107+
4108+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4109+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4110+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4111+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4112+
4113+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4114+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4115+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4116+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
41004117
}
41014118

41024119
class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
@@ -4228,6 +4245,16 @@ multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
42284245
multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
42294246
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
42304247
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4248+
4249+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4250+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4251+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4252+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4253+
4254+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4255+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4256+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4257+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
42314258
}
42324259

42334260

0 commit comments

Comments
 (0)