Skip to content

Commit f6ca0ed

Browse files
[llvm][AArch64][Assembly]: Add SME_F8F16 and SME_F8F32 Ass/Disass. (#70640)
This patch adds the feature flags of SME_F8F16 and SME_F8F32, and the assembly/disassembly for the following instructions of SME2: * SME: - FMLAL, FMLALL - FVDOT, FVDOTT - FVDOTB - FMOPA That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09 Co-authored-by: Caroline Concatto <[email protected]>
1 parent 4ae4167 commit f6ca0ed

File tree

16 files changed

+1404
-14
lines changed

16 files changed

+1404
-14
lines changed

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ enum ArchExtKind : unsigned {
170170
AEK_SSVE_FP8DOT4 = 66, // FEAT_SSVE_FP8DOT4
171171
AEK_LUT = 67, // FEAT_LUT
172172
AEK_SME_LUTv2 = 68, // FEAT_SME_LUTv2
173+
AEK_SMEF8F16 = 69, // FEAT_SME_F8F16
174+
AEK_SMEF8F32 = 70, // FEAT_SME_F8F32
173175
AEK_NUM_EXTENSIONS
174176
};
175177
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -289,6 +291,8 @@ inline constexpr ExtensionInfo Extensions[] = {
289291
{"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
290292
{"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0},
291293
{"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0},
294+
{"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme2,+fp8", 0},
295+
{"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0},
292296
// Special cases
293297
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
294298
};

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,12 @@ def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
540540
def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
541541
"Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;
542542

543+
def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
544+
"Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
545+
546+
def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
547+
"Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;
548+
543549
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
544550
"Apple A7 (the CPU formerly known as Cyclone)">;
545551

@@ -770,8 +776,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
770776
def SVE2p1Unsupported : AArch64Unsupported;
771777

772778
def SVE2Unsupported : AArch64Unsupported {
773-
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA,
774-
HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
779+
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
780+
HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
775781
SVE2p1Unsupported.F);
776782
}
777783

@@ -784,7 +790,8 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
784790
def SME2p1Unsupported : AArch64Unsupported;
785791

786792
def SME2Unsupported : AArch64Unsupported {
787-
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA],
793+
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
794+
HasSMEF8F16, HasSMEF8F32],
788795
SME2p1Unsupported.F);
789796
}
790797

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,10 @@ def HasLUT : Predicate<"Subtarget->hasLUT()">,
191191
AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
192192
def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">,
193193
AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
194+
def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">,
195+
AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
196+
def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">,
197+
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
194198

195199
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
196200
// they should be enabled if either has been specified.

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,3 +894,56 @@ def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
894894
let Predicates = [HasSME2p1, HasSME_LUTv2] in {
895895
def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
896896
} //[HasSME2p1, HasSME_LUTv2]
897+
898+
let Predicates = [HasSMEF8F16] in {
899+
defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
900+
defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
901+
defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_16b<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>;
902+
defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
903+
defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
904+
// TODO: Replace nxv16i8 by nxv16f8
905+
defm FDOT_VG2_M2Z2Z_BtoH : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
906+
defm FDOT_VG4_M4Z4Z_BtoH : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
907+
908+
def FMLAL_MZZI_BtoH : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>;
909+
defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>;
910+
defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>;
911+
def FMLAL_VG2_MZZ_BtoH : sme2_mla_long_array_single_16b<"fmlal">;
912+
// TODO: Replace nxv16i8 by nxv16f8
913+
defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>;
914+
defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>;
915+
defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
916+
defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
917+
918+
defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_fp16<"fmopa", 0b1, 0b0, 0b01, ZPR8>;
919+
920+
} //[HasSMEF8F16]
921+
922+
let Predicates = [HasSMEF8F32] in {
923+
// TODO : Replace nxv16i8 by nxv16f8
924+
defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
925+
defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
926+
defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
927+
defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;
928+
// TODO : Replace nxv16i8 by nxv16f8
929+
defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
930+
defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
931+
932+
def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
933+
def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;
934+
935+
defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b000, null_frag>;
936+
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
937+
defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>;
938+
// TODO: Replace nxv16i8 by nxv16f8
939+
defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>;
940+
defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>;
941+
defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>;
942+
defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
943+
defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
944+
945+
946+
defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>;
947+
948+
} //[HasSMEF8F32]
949+

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures =
2424
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
2525
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26-
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA];
26+
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32];
2727

2828
let FullInstRWOverlapCheck = 0;
2929
}

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3660,6 +3660,8 @@ static const struct Extension {
36603660
{"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
36613661
{"lut", {AArch64::FeatureLUT}},
36623662
{"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
3663+
{"sme-f8f16", {AArch64::FeatureSMEF8F16}},
3664+
{"sme-f8f32", {AArch64::FeatureSMEF8F32}},
36633665
};
36643666

36653667
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -4578,7 +4580,6 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
45784580
Operands.push_back(
45794581
AArch64Operand::CreateToken("]", getLoc(), getContext()));
45804582
}
4581-
45824583
return ParseStatus::Success;
45834584
}
45844585

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 103 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,17 @@ multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op,
19221922
def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
19231923
}
19241924

1925+
class sme2_mla_long_array_single_16b<string mnemonic>
1926+
: sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> {
1927+
bits<4> Zm;
1928+
bits<5> Zn;
1929+
bits<3> imm;
1930+
let Inst{20} = 0b1;
1931+
let Inst{19-16} = Zm;
1932+
let Inst{9-5} = Zn;
1933+
let Inst{2-0} = imm;
1934+
}
1935+
19251936
class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
19261937
MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
19271938
ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
@@ -1937,7 +1948,6 @@ class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
19371948
let Inst{1-0} = imm;
19381949
}
19391950

1940-
19411951
multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
19421952
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
19431953
ValueType zpr_ty, SDPatternOperator intrinsic> {
@@ -1971,7 +1981,8 @@ multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, Matrix
19711981
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
19721982
ValueType zpr_ty, SDPatternOperator intrinsic> {
19731983
def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
1974-
vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1>;
1984+
vector_ty, mnemonic, "vgx4">,
1985+
SMEPseudo2Instr<NAME, 1>;
19751986

19761987
def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
19771988
SMEMatrixArray>;
@@ -2390,7 +2401,6 @@ multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
23902401

23912402
//===----------------------------------------------------------------------===//
23922403
// SME2 Dot Products and MLA
2393-
23942404
class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
23952405
RegisterOperand multi_vector_ty,
23962406
ZPRRegOp vector_ty, Operand index_ty,
@@ -2428,7 +2438,6 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
24282438
bits<2> i;
24292439
let Inst{11-10} = i;
24302440
}
2431-
24322441
def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
24332442

24342443
def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
@@ -2439,6 +2448,7 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
24392448
}
24402449

24412450
// SME2.1 multi-vec ternary indexed two registers 16-bit
2451+
// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
24422452
multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
24432453
RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
24442454
def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
@@ -2448,11 +2458,24 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bit
24482458
let Inst{11-10} = i{2-1};
24492459
let Inst{3} = i{0};
24502460
}
2461+
24512462
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
24522463
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
24532464
multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
24542465
}
24552466

2467+
// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
2468+
// two registers
2469+
class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
2470+
: sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
2471+
ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
2472+
2473+
bits<2> i;
2474+
let Inst{10} = i{1};
2475+
let Inst{3} = i{0};
2476+
let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
2477+
}
2478+
24562479
// SME2 multi-vec ternary indexed two registers 64-bit
24572480

24582481
class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
@@ -2608,7 +2631,83 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
26082631
(!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
26092632
multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
26102633
}
2634+
2635+
// FMLAL (multiple and indexed vector, FP8 to FP16)
2636+
class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
2637+
RegisterOperand multi_vector_ty, string mnemonic>
2638+
: I<(outs MatrixOp16:$ZAda),
2639+
(ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2640+
multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
2641+
mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2642+
"", []>, Sched<[]> {
2643+
bits<4> Zm;
2644+
bits<2> Rv;
2645+
bits<4> i;
2646+
bits<2> imm2;
2647+
let Inst{31-24} = 0b11000001;
2648+
let Inst{23-22} = sz;
2649+
let Inst{21-20} = 0b01;
2650+
let Inst{19-16} = Zm;
2651+
let Inst{15} = vg4;
2652+
let Inst{14-13} = Rv;
2653+
let Inst{12} = op{2};
2654+
let Inst{11-10} = i{3-2};
2655+
let Inst{5-4} = op{1-0};
2656+
let Inst{3-2} = i{1-0};
2657+
let Inst{1-0} = imm2;
2658+
2659+
let Constraints = "$ZAda = $_ZAda";
2660+
}
2661+
2662+
multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
2663+
def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
2664+
bits<4> Zn;
2665+
let Inst{9-6} = Zn;
2666+
}
2667+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2668+
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2669+
uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2670+
}
2671+
2672+
multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
2673+
def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
2674+
bits<3> Zn;
2675+
let Inst{9-7} = Zn;
2676+
let Inst{6} = 0b0;
2677+
}
2678+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2679+
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2680+
uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2681+
}
2682+
26112683
//===----------------------------------------------------------------------===//
2684+
// SME2 multi-vec indexed long long MLA one source 16-bit
2685+
class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
2686+
: I<(outs MatrixOp16:$ZAda),
2687+
(ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2688+
mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2689+
"", []>, Sched<[]> {
2690+
bits<4> Zm;
2691+
bits<2> Rv;
2692+
bits<4> i;
2693+
bits<5> Zn;
2694+
bits<3> imm3;
2695+
let Inst{31-24} = 0b11000001;
2696+
let Inst{23-22} = sz;
2697+
let Inst{21-20} = 0b00;
2698+
let Inst{19-16} = Zm;
2699+
let Inst{15} = i{3};
2700+
let Inst{14-13} = Rv;
2701+
let Inst{12} = op{1};
2702+
let Inst{11-10} = i{2-1};
2703+
let Inst{9-5} = Zn;
2704+
let Inst{4} = op{0};
2705+
let Inst{3} = i{0};
2706+
let Inst{2-0} = imm3;
2707+
2708+
let Constraints = "$ZAda = $_ZAda";
2709+
}
2710+
26122711
// SME2 multi-vec indexed long long MLA one source 32-bit
26132712
class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
26142713
: I<(outs MatrixOp32:$ZAda),

0 commit comments

Comments
 (0)