Skip to content

Commit ae3471a

Browse files
CarolineConcattoAlexisPerry
authored andcommitted
[CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (llvm#88710)
According to the specification in ARM-software/acle#309 this adds the intrinsics // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");
1 parent 4af0c48 commit ae3471a

File tree

8 files changed

+2067
-23
lines changed

8 files changed

+2067
-23
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,4 +764,27 @@ let SMETargetGuard = "sme-f16f16" in {
764764
[ImmCheck<0, ImmCheck0_1>]>;
765765
}
766766

767+
768+
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
769+
let SMETargetGuard = "sme2p1" in {
770+
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
771+
MergeNone, i_prefix # "_horiz_x" # vg_num,
772+
[IsStreaming, IsInOutZA], ch>;
773+
774+
def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
775+
MergeNone, i_prefix # "_vert_x" #vg_num,
776+
[IsStreaming, IsInOutZA], ch>;
777+
}
778+
}
779+
780+
defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
781+
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
782+
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
783+
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
784+
785+
defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
786+
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
787+
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
788+
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
789+
767790
} // let SVETargetGuard = InvalidMode

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 1414 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,6 +2841,24 @@ let TargetPrefix = "aarch64" in {
28412841
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
28422842
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
28432843

2844+
class SME_MOVAZ_TileToVector_X2_Intrinsic
2845+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2846+
[llvm_i32_ty, llvm_i32_ty],
2847+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2848+
2849+
class SME_MOVAZ_TileToVector_X4_Intrinsic
2850+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
2851+
LLVMMatchType<0>,LLVMMatchType<0>],
2852+
[llvm_i32_ty, llvm_i32_ty],
2853+
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2854+
2855+
def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2856+
def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
2857+
2858+
def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2859+
def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
2860+
2861+
28442862
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28452863

28462864
class SME_OuterProduct_Intrinsic

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
395395
template <unsigned MaxIdx, unsigned Scale>
396396
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397397
unsigned Op);
398-
398+
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
399+
unsigned MaxIdx, unsigned Scale);
399400
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
400401
/// SVE Reg+Imm addressing mode.
401402
template <int64_t Min, int64_t Max>
@@ -2003,6 +2004,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
20032004
CurDAG->RemoveDeadNode(N);
20042005
}
20052006

2007+
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2008+
unsigned Op, unsigned MaxIdx,
2009+
unsigned Scale) {
2010+
2011+
SDValue SliceBase = N->getOperand(3);
2012+
SDValue Base, Offset;
2013+
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2014+
return;
2015+
// The correct Za tile number is computed in Machine Instruction
2016+
// See EmitZAInstr
2017+
// DAG cannot select Za tile as an output register with ZReg
2018+
SDLoc DL(N);
2019+
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
2020+
/*Chain*/ N->getOperand(0)};
2021+
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2022+
2023+
EVT VT = N->getValueType(0);
2024+
for (unsigned I = 0; I < NumVecs; ++I)
2025+
ReplaceUses(SDValue(N, I),
2026+
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2027+
SDValue(Mov, 0)));
2028+
2029+
// Copy chain
2030+
unsigned ChainIdx = NumVecs;
2031+
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2032+
CurDAG->RemoveDeadNode(N);
2033+
}
2034+
20062035
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
20072036
unsigned NumOutVecs,
20082037
bool IsTupleInput,
@@ -5245,6 +5274,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
52455274
AArch64::MOVA_VG4_4ZMXI);
52465275
return;
52475276
}
5277+
case Intrinsic::aarch64_sme_readz_horiz_x2: {
5278+
if (VT == MVT::nxv16i8) {
5279+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5280+
return;
5281+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5282+
VT == MVT::nxv8bf16) {
5283+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5284+
return;
5285+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5286+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5287+
return;
5288+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5289+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5290+
return;
5291+
}
5292+
break;
5293+
}
5294+
case Intrinsic::aarch64_sme_readz_vert_x2: {
5295+
if (VT == MVT::nxv16i8) {
5296+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5297+
return;
5298+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5299+
VT == MVT::nxv8bf16) {
5300+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5301+
return;
5302+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5303+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5304+
return;
5305+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5306+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5307+
return;
5308+
}
5309+
break;
5310+
}
5311+
case Intrinsic::aarch64_sme_readz_horiz_x4: {
5312+
if (VT == MVT::nxv16i8) {
5313+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5314+
return;
5315+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5316+
VT == MVT::nxv8bf16) {
5317+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5318+
return;
5319+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5321+
return;
5322+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5323+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5324+
return;
5325+
}
5326+
break;
5327+
}
5328+
case Intrinsic::aarch64_sme_readz_vert_x4: {
5329+
if (VT == MVT::nxv16i8) {
5330+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5331+
return;
5332+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5333+
VT == MVT::nxv8bf16) {
5334+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5335+
return;
5336+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5337+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5338+
return;
5339+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5340+
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5341+
return;
5342+
}
5343+
break;
5344+
}
52485345
case Intrinsic::swift_async_context_addr: {
52495346
SDLoc DL(Node);
52505347
SDValue Chain = Node->getOperand(0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2976,18 +2976,25 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
29762976
MachineBasicBlock *
29772977
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
29782978
MachineInstr &MI,
2979-
MachineBasicBlock *BB, bool HasTile) const {
2979+
MachineBasicBlock *BB) const {
29802980
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
29812981
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
29822982
unsigned StartIdx = 0;
29832983

2984+
bool HasTile = BaseReg != AArch64::ZA;
2985+
bool HasZPROut = HasTile && MI.getOperand(0).isReg();
2986+
if (HasZPROut) {
2987+
MIB.add(MI.getOperand(0)); // Output ZPR
2988+
++StartIdx;
2989+
}
29842990
if (HasTile) {
2985-
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2986-
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2987-
StartIdx = 1;
2988-
} else
2991+
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm(),
2992+
RegState::Define); // Output ZA Tile
2993+
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile
2994+
StartIdx++;
2995+
} else {
29892996
MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
2990-
2997+
}
29912998
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
29922999
MIB.add(MI.getOperand(I));
29933000

@@ -3096,17 +3103,17 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
30963103
TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
30973104
switch (SMEMatrixType) {
30983105
case (AArch64::SMEMatrixArray):
3099-
return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false);
3106+
return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB);
31003107
case (AArch64::SMEMatrixTileB):
3101-
return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true);
3108+
return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB);
31023109
case (AArch64::SMEMatrixTileH):
3103-
return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true);
3110+
return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB);
31043111
case (AArch64::SMEMatrixTileS):
3105-
return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true);
3112+
return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB);
31063113
case (AArch64::SMEMatrixTileD):
3107-
return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true);
3114+
return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB);
31083115
case (AArch64::SMEMatrixTileQ):
3109-
return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true);
3116+
return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB);
31103117
}
31113118
}
31123119

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,7 @@ class AArch64TargetLowering : public TargetLowering {
655655
MachineBasicBlock *BB) const;
656656
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657657
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658-
MachineInstr &MI, MachineBasicBlock *BB,
659-
bool HasTile) const;
658+
MachineInstr &MI, MachineBasicBlock *BB) const;
660659
MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
661660
unsigned Opcode, bool Op0IsDef) const;
662661
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
111111
let usesCustomInserter = 1;
112112
}
113113

114+
class sme2_movez_to_tile_multi_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115+
: SMEPseudo2Instr<name, 0>,
116+
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117+
let SMEMatrixType = za_flag;
118+
let usesCustomInserter = 1;
119+
}
114120
//===----------------------------------------------------------------------===//
115121
// SME pattern match helpers.
116122
//===----------------------------------------------------------------------===//
@@ -4000,15 +4006,15 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
40004006
def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
40014007
!if(v, TileVectorOpV8,
40024008
TileVectorOpH8),
4003-
uimm3s2range, mnemonic> {
4009+
uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
40044010
bits<3> imm;
40054011
let Inst{7-5} = imm;
40064012
}
40074013

40084014
def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
40094015
!if(v, TileVectorOpV16,
40104016
TileVectorOpH16),
4011-
uimm2s2range, mnemonic> {
4017+
uimm2s2range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
40124018
bits<1> ZAn;
40134019
bits<2> imm;
40144020
let Inst{7} = ZAn;
@@ -4018,7 +4024,7 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
40184024
def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
40194025
!if(v, TileVectorOpV32,
40204026
TileVectorOpH32),
4021-
uimm1s2range, mnemonic> {
4027+
uimm1s2range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
40224028
bits<2> ZAn;
40234029
bits<1> imm;
40244030
let Inst{7-6} = ZAn;
@@ -4028,7 +4034,7 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
40284034
def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
40294035
!if(v, TileVectorOpV64,
40304036
TileVectorOpH64),
4031-
uimm0s2range, mnemonic> {
4037+
uimm0s2range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
40324038
bits<3> ZAn;
40334039
let Inst{7-5} = ZAn;
40344040
}
@@ -4097,6 +4103,17 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
40974103
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
40984104
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
40994105
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4106+
4107+
4108+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4109+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4110+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4111+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4112+
4113+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4114+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4115+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4116+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
41004117
}
41014118

41024119
class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
@@ -4130,7 +4147,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
41304147
ZZZZ_b_mul_r,
41314148
!if(v, TileVectorOpV8,
41324149
TileVectorOpH8),
4133-
uimm2s4range, mnemonic> {
4150+
uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
41344151
bits<2> imm;
41354152
let Inst{6-5} = imm;
41364153
}
@@ -4139,7 +4156,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
41394156
ZZZZ_h_mul_r,
41404157
!if(v, TileVectorOpV16,
41414158
TileVectorOpH16),
4142-
uimm1s4range, mnemonic> {
4159+
uimm1s4range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
41434160
bits<1> ZAn;
41444161
bits<1> imm;
41454162
let Inst{6} = ZAn;
@@ -4150,7 +4167,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
41504167
ZZZZ_s_mul_r,
41514168
!if(v, TileVectorOpV32,
41524169
TileVectorOpH32),
4153-
uimm0s4range, mnemonic> {
4170+
uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
41544171
bits<2> ZAn;
41554172
let Inst{6-5} = ZAn;
41564173
}
@@ -4159,7 +4176,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
41594176
ZZZZ_d_mul_r,
41604177
!if(v, TileVectorOpV64,
41614178
TileVectorOpH64),
4162-
uimm0s4range, mnemonic> {
4179+
uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
41634180
bits<3> ZAn;
41644181
let Inst{7-5} = ZAn;
41654182
}
@@ -4228,6 +4245,16 @@ multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
42284245
multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
42294246
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
42304247
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4248+
4249+
def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4250+
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4251+
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4252+
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4253+
4254+
def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4255+
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4256+
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4257+
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
42314258
}
42324259

42334260

0 commit comments

Comments
 (0)