Skip to content

[llvm][AArch64][Assembly]: Add SME_F8F16 and SME_F8F32 Ass/Disass. #70640

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/TargetParser/AArch64TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ enum ArchExtKind : unsigned {
AEK_SSVE_FP8DOT4 = 66, // FEAT_SSVE_FP8DOT4
AEK_LUT = 67, // FEAT_LUT
AEK_SME_LUTv2 = 68, // FEAT_SME_LUTv2
AEK_SMEF8F16 = 69, // FEAT_SME_F8F16
AEK_SMEF8F32 = 70, // FEAT_SME_F8F32
AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
Expand Down Expand Up @@ -289,6 +291,8 @@ inline constexpr ExtensionInfo Extensions[] = {
{"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
{"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0},
{"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0},
{"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme2,+fp8", 0},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove this from here. We can add in the future if needed.
You can remove for sme-f8f16 and sme-f8f32

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is it different for this only ? I mean as long as the others exist, so why not we keep this ?

{"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,12 @@ def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
"Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;

def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
"Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this from here too. We can add later if that is needed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't the "HasSMEF8F16" depend on this feature definition ?
And HasSMEFP8FP16 is required by some instances like FMOPA_MPPZZ_BtoH for example.


def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
"Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;

def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;

Expand Down Expand Up @@ -770,8 +776,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
def SVE2p1Unsupported : AArch64Unsupported;

def SVE2Unsupported : AArch64Unsupported {
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA,
HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
SVE2p1Unsupported.F);
}

Expand All @@ -784,7 +790,8 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
def SME2p1Unsupported : AArch64Unsupported;

def SME2Unsupported : AArch64Unsupported {
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA],
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
HasSMEF8F16, HasSMEF8F32],
SME2p1Unsupported.F);
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ def HasLUT : Predicate<"Subtarget->hasLUT()">,
AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">,
AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;

// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
Expand Down
53 changes: 53 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,56 @@ def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
let Predicates = [HasSME2p1, HasSME_LUTv2] in {
def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
} //[HasSME2p1, HasSME_LUTv2]

let Predicates = [HasSMEF8F16] in {
defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_16b<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
// TODO: Replace nxv16i8 by nxv16f8
defm FDOT_VG2_M2Z2Z_BtoH : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
defm FDOT_VG4_M4Z4Z_BtoH : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;

def FMLAL_MZZI_BtoH : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>;
defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>;
defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>;
def FMLAL_VG2_MZZ_BtoH : sme2_mla_long_array_single_16b<"fmlal">;
// TODO: Replace nxv16i8 by nxv16f8
defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>;
defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>;
defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;

defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_fp16<"fmopa", 0b1, 0b0, 0b01, ZPR8>;

} //[HasSMEF8F16]

let Predicates = [HasSMEF8F32] in {
// TODO : Replace nxv16i8 by nxv16f8
defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;
// TODO : Replace nxv16i8 by nxv16f8
defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;

def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;

defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b000, null_frag>;
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>;
// TODO: Replace nxv16i8 by nxv16f8
defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>;
defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>;
defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>;
defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;


defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>;

} //[HasSMEF8F32]

2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64SchedA64FX.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures =
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA];
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32];

let FullInstRWOverlapCheck = 0;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3660,6 +3660,8 @@ static const struct Extension {
{"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
{"lut", {AArch64::FeatureLUT}},
{"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
{"sme-f8f16", {AArch64::FeatureSMEF8F16}},
{"sme-f8f32", {AArch64::FeatureSMEF8F32}},
};

static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Expand Down Expand Up @@ -4578,7 +4580,6 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateToken("]", getLoc(), getContext()));
}

return ParseStatus::Success;
}

Expand Down
107 changes: 103 additions & 4 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1922,6 +1922,17 @@ multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op,
def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
}

class sme2_mla_long_array_single_16b<string mnemonic>
: sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> {
bits<4> Zm;
bits<5> Zn;
bits<3> imm;
let Inst{20} = 0b1;
let Inst{19-16} = Zm;
let Inst{9-5} = Zn;
let Inst{2-0} = imm;
}

class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
Expand All @@ -1937,7 +1948,6 @@ class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
let Inst{1-0} = imm;
}


multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
ValueType zpr_ty, SDPatternOperator intrinsic> {
Expand Down Expand Up @@ -1971,7 +1981,8 @@ multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, Matrix
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
ValueType zpr_ty, SDPatternOperator intrinsic> {
def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1>;
vector_ty, mnemonic, "vgx4">,
SMEPseudo2Instr<NAME, 1>;

def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
SMEMatrixArray>;
Expand Down Expand Up @@ -2390,7 +2401,6 @@ multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {

//===----------------------------------------------------------------------===//
// SME2 Dot Products and MLA

class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
RegisterOperand multi_vector_ty,
ZPRRegOp vector_ty, Operand index_ty,
Expand Down Expand Up @@ -2428,7 +2438,6 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
bits<2> i;
let Inst{11-10} = i;
}

def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;

def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
Expand All @@ -2439,6 +2448,7 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
}

// SME2.1 multi-vec ternary indexed two registers 16-bit
// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
Expand All @@ -2448,11 +2458,24 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bit
let Inst{11-10} = i{2-1};
let Inst{3} = i{0};
}

def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
}

// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
// two registers
class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
: sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {

bits<2> i;
let Inst{10} = i{1};
let Inst{3} = i{0};
let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
}

// SME2 multi-vec ternary indexed two registers 64-bit

class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
Expand Down Expand Up @@ -2608,7 +2631,83 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
(!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
}

// FMLAL (multiple and indexed vector, FP8 to FP16)
class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
RegisterOperand multi_vector_ty, string mnemonic>
: I<(outs MatrixOp16:$ZAda),
(ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
bits<2> Rv;
bits<4> i;
bits<2> imm2;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b01;
let Inst{19-16} = Zm;
let Inst{15} = vg4;
let Inst{14-13} = Rv;
let Inst{12} = op{2};
let Inst{11-10} = i{3-2};
let Inst{5-4} = op{1-0};
let Inst{3-2} = i{1-0};
let Inst{1-0} = imm2;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
bits<4> Zn;
let Inst{9-6} = Zn;
}
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
}

multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
bits<3> Zn;
let Inst{9-7} = Zn;
let Inst{6} = 0b0;
}
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
}

//===----------------------------------------------------------------------===//
// SME2 multi-vec indexed long long MLA one source 16-bit
class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
: I<(outs MatrixOp16:$ZAda),
(ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
bits<2> Rv;
bits<4> i;
bits<5> Zn;
bits<3> imm3;
let Inst{31-24} = 0b11000001;
let Inst{23-22} = sz;
let Inst{21-20} = 0b00;
let Inst{19-16} = Zm;
let Inst{15} = i{3};
let Inst{14-13} = Rv;
let Inst{12} = op{1};
let Inst{11-10} = i{2-1};
let Inst{9-5} = Zn;
let Inst{4} = op{0};
let Inst{3} = i{0};
let Inst{2-0} = imm3;

let Constraints = "$ZAda = $_ZAda";
}

// SME2 multi-vec indexed long long MLA one source 32-bit
class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
: I<(outs MatrixOp32:$ZAda),
Expand Down
Loading