Skip to content

[CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors #88710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -764,4 +764,27 @@ let SMETargetGuard = "sme-f16f16" in {
[ImmCheck<0, ImmCheck0_1>]>;
}


multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
let SMETargetGuard = "sme2p1" in {
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
MergeNone, i_prefix # "_horiz_x" # vg_num,
[IsStreaming, IsInOutZA], ch>;

def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
MergeNone, i_prefix # "_vert_x" #vg_num,
[IsStreaming, IsInOutZA], ch>;
}
}

defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;

defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;

} // let SVETargetGuard = InvalidMode
1,414 changes: 1,414 additions & 0 deletions clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -2841,6 +2841,24 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;

class SME_MOVAZ_TileToVector_X2_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;

class SME_MOVAZ_TileToVector_X4_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>,LLVMMatchType<0>],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;

def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;

def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;


def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

class SME_OuterProduct_Intrinsic
Expand Down
99 changes: 98 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
unsigned Op);

void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
unsigned MaxIdx, unsigned Scale);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
template <int64_t Min, int64_t Max>
Expand Down Expand Up @@ -2003,6 +2004,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}

void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scale could be used instead of NumVecs parameter

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The real question is why is this is a function template or, if you want, why AArch64DAGToDAGISel::SelectMultiVectorMove is a function template? Both the template parameters do not affect any type, so we aren't benefiting from any kind of parametric polymorphism, the parameters themselves are only passed as ordinary parameters to SelectSMETileSlice and as such can't participate in any constant folding that would warrant multiple instantiations (each of which is essentially a specialisation of the function).
IMHO, it's OK to have the Scale and NumVecs separate since SelectMultiVectorMove is a hint we may need it some day.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. Somehow I assumed we are using the template version of SelectSMETileSlice so I didn't question it

unsigned Op, unsigned MaxIdx,
unsigned Scale) {

SDValue SliceBase = N->getOperand(3);
SDValue Base, Offset;
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
return;
// The correct Za tile number is computed in Machine Instruction
// See EmitZAInstr
// DAG cannot select Za tile as an output register with ZReg
SDLoc DL(N);
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
/*Chain*/ N->getOperand(0)};
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);

EVT VT = N->getValueType(0);
for (unsigned I = 0; I < NumVecs; ++I)
ReplaceUses(SDValue(N, I),
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
SDValue(Mov, 0)));

// Copy chain
unsigned ChainIdx = NumVecs;
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
CurDAG->RemoveDeadNode(N);
}

void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
unsigned NumOutVecs,
bool IsTupleInput,
Expand Down Expand Up @@ -5245,6 +5274,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::MOVA_VG4_4ZMXI);
return;
}
case Intrinsic::aarch64_sme_readz_horiz_x2: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
return;
}
break;
}
case Intrinsic::aarch64_sme_readz_vert_x2: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
return;
}
break;
}
case Intrinsic::aarch64_sme_readz_horiz_x4: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
return;
}
break;
}
case Intrinsic::aarch64_sme_readz_vert_x4: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
return;
}
break;
}
case Intrinsic::swift_async_context_addr: {
SDLoc DL(Node);
SDValue Chain = Node->getOperand(0);
Expand Down
31 changes: 19 additions & 12 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2976,18 +2976,25 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
MachineBasicBlock *
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB, bool HasTile) const {
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
unsigned StartIdx = 0;

bool HasTile = BaseReg != AArch64::ZA;
bool HasZPROut = HasTile && MI.getOperand(0).isReg();
if (HasZPROut) {
MIB.add(MI.getOperand(0)); // Output ZPR
++StartIdx;
}
if (HasTile) {
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
StartIdx = 1;
} else
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm(),
RegState::Define); // Output ZA Tile
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile
StartIdx++;
} else {
MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);

}
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
MIB.add(MI.getOperand(I));

Expand Down Expand Up @@ -3096,17 +3103,17 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
switch (SMEMatrixType) {
case (AArch64::SMEMatrixArray):
return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false);
return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB);
case (AArch64::SMEMatrixTileB):
return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true);
return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB);
case (AArch64::SMEMatrixTileH):
return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true);
return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB);
case (AArch64::SMEMatrixTileS):
return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true);
return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB);
case (AArch64::SMEMatrixTileD):
return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true);
return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB);
case (AArch64::SMEMatrixTileQ):
return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true);
return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB);
}
}

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,8 +655,7 @@ class AArch64TargetLowering : public TargetLowering {
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI, MachineBasicBlock *BB,
bool HasTile) const;
MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
unsigned Opcode, bool Op0IsDef) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
Expand Down
43 changes: 35 additions & 8 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
let usesCustomInserter = 1;
}

class sme2_movez_to_tile_multi_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4000,15 +4006,15 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
!if(v, TileVectorOpV8,
TileVectorOpH8),
uimm3s2range, mnemonic> {
uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
bits<3> imm;
let Inst{7-5} = imm;
}

def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
!if(v, TileVectorOpV16,
TileVectorOpH16),
uimm2s2range, mnemonic> {
uimm2s2range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
bits<1> ZAn;
bits<2> imm;
let Inst{7} = ZAn;
Expand All @@ -4018,7 +4024,7 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
!if(v, TileVectorOpV32,
TileVectorOpH32),
uimm1s2range, mnemonic> {
uimm1s2range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
bits<2> ZAn;
bits<1> imm;
let Inst{7-6} = ZAn;
Expand All @@ -4028,7 +4034,7 @@ multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemo
def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
!if(v, TileVectorOpV64,
TileVectorOpH64),
uimm0s2range, mnemonic> {
uimm0s2range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
bits<3> ZAn;
let Inst{7-5} = ZAn;
}
Expand Down Expand Up @@ -4097,6 +4103,17 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;


def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;

def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
}

class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
Expand Down Expand Up @@ -4130,7 +4147,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
ZZZZ_b_mul_r,
!if(v, TileVectorOpV8,
TileVectorOpH8),
uimm2s4range, mnemonic> {
uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
bits<2> imm;
let Inst{6-5} = imm;
}
Expand All @@ -4139,7 +4156,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
ZZZZ_h_mul_r,
!if(v, TileVectorOpV16,
TileVectorOpH16),
uimm1s4range, mnemonic> {
uimm1s4range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
bits<1> ZAn;
bits<1> imm;
let Inst{6} = ZAn;
Expand All @@ -4150,7 +4167,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
ZZZZ_s_mul_r,
!if(v, TileVectorOpV32,
TileVectorOpH32),
uimm0s4range, mnemonic> {
uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
bits<2> ZAn;
let Inst{6-5} = ZAn;
}
Expand All @@ -4159,7 +4176,7 @@ multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemo
ZZZZ_d_mul_r,
!if(v, TileVectorOpV64,
TileVectorOpH64),
uimm0s4range, mnemonic> {
uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
bits<3> ZAn;
let Inst{7-5} = ZAn;
}
Expand Down Expand Up @@ -4228,6 +4245,16 @@ multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;

def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;

def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
}


Expand Down
Loading
Loading