@@ -1922,6 +1922,17 @@ multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op,
1922
1922
def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
1923
1923
}
1924
1924
1925
+ class sme2_mla_long_array_single_16b<string mnemonic>
1926
+ : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> {
1927
+ bits<4> Zm;
1928
+ bits<5> Zn;
1929
+ bits<3> imm;
1930
+ let Inst{20} = 0b1;
1931
+ let Inst{19-16} = Zm;
1932
+ let Inst{9-5} = Zn;
1933
+ let Inst{2-0} = imm;
1934
+ }
1935
+
1925
1936
class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
1926
1937
MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
1927
1938
ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
@@ -1937,7 +1948,6 @@ class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
1937
1948
let Inst{1-0} = imm;
1938
1949
}
1939
1950
1940
-
1941
1951
multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
1942
1952
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
1943
1953
ValueType zpr_ty, SDPatternOperator intrinsic> {
@@ -1971,7 +1981,8 @@ multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, Matrix
1971
1981
RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
1972
1982
ValueType zpr_ty, SDPatternOperator intrinsic> {
1973
1983
def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
1974
- vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1>;
1984
+ vector_ty, mnemonic, "vgx4">,
1985
+ SMEPseudo2Instr<NAME, 1>;
1975
1986
1976
1987
def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
1977
1988
SMEMatrixArray>;
@@ -2390,7 +2401,6 @@ multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
2390
2401
2391
2402
//===----------------------------------------------------------------------===//
2392
2403
// SME2 Dot Products and MLA
2393
-
2394
2404
class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
2395
2405
RegisterOperand multi_vector_ty,
2396
2406
ZPRRegOp vector_ty, Operand index_ty,
@@ -2428,7 +2438,6 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
2428
2438
bits<2> i;
2429
2439
let Inst{11-10} = i;
2430
2440
}
2431
-
2432
2441
def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2433
2442
2434
2443
def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
@@ -2439,6 +2448,7 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
2439
2448
}
2440
2449
2441
2450
// SME2.1 multi-vec ternary indexed two registers 16-bit
2451
+ // SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
2442
2452
multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
2443
2453
RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
2444
2454
def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
@@ -2448,11 +2458,24 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bit
2448
2458
let Inst{11-10} = i{2-1};
2449
2459
let Inst{3} = i{0};
2450
2460
}
2461
+
2451
2462
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2452
2463
(!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2453
2464
multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2454
2465
}
2455
2466
2467
+ // SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
2468
+ // two registers
2469
+ class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
2470
+ : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
2471
+ ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
2472
+
2473
+ bits<2> i;
2474
+ let Inst{10} = i{1};
2475
+ let Inst{3} = i{0};
2476
+ let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
2477
+ }
2478
+
2456
2479
// SME2 multi-vec ternary indexed two registers 64-bit
2457
2480
2458
2481
class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
@@ -2608,7 +2631,83 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
2608
2631
(!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2609
2632
multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2610
2633
}
2634
+
2635
+ // FMLAL (multiple and indexed vector, FP8 to FP16)
2636
+ class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
2637
+ RegisterOperand multi_vector_ty, string mnemonic>
2638
+ : I<(outs MatrixOp16:$ZAda),
2639
+ (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2640
+ multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
2641
+ mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2642
+ "", []>, Sched<[]> {
2643
+ bits<4> Zm;
2644
+ bits<2> Rv;
2645
+ bits<4> i;
2646
+ bits<2> imm2;
2647
+ let Inst{31-24} = 0b11000001;
2648
+ let Inst{23-22} = sz;
2649
+ let Inst{21-20} = 0b01;
2650
+ let Inst{19-16} = Zm;
2651
+ let Inst{15} = vg4;
2652
+ let Inst{14-13} = Rv;
2653
+ let Inst{12} = op{2};
2654
+ let Inst{11-10} = i{3-2};
2655
+ let Inst{5-4} = op{1-0};
2656
+ let Inst{3-2} = i{1-0};
2657
+ let Inst{1-0} = imm2;
2658
+
2659
+ let Constraints = "$ZAda = $_ZAda";
2660
+ }
2661
+
2662
+ multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
2663
+ def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
2664
+ bits<4> Zn;
2665
+ let Inst{9-6} = Zn;
2666
+ }
2667
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2668
+ (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2669
+ uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2670
+ }
2671
+
2672
+ multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
2673
+ def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
2674
+ bits<3> Zn;
2675
+ let Inst{9-7} = Zn;
2676
+ let Inst{6} = 0b0;
2677
+ }
2678
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2679
+ (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2680
+ uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2681
+ }
2682
+
2611
2683
//===----------------------------------------------------------------------===//
2684
+ // SME2 multi-vec indexed long long MLA one source 16-bit
2685
+ class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
2686
+ : I<(outs MatrixOp16:$ZAda),
2687
+ (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2688
+ mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2689
+ "", []>, Sched<[]> {
2690
+ bits<4> Zm;
2691
+ bits<2> Rv;
2692
+ bits<4> i;
2693
+ bits<5> Zn;
2694
+ bits<3> imm3;
2695
+ let Inst{31-24} = 0b11000001;
2696
+ let Inst{23-22} = sz;
2697
+ let Inst{21-20} = 0b00;
2698
+ let Inst{19-16} = Zm;
2699
+ let Inst{15} = i{3};
2700
+ let Inst{14-13} = Rv;
2701
+ let Inst{12} = op{1};
2702
+ let Inst{11-10} = i{2-1};
2703
+ let Inst{9-5} = Zn;
2704
+ let Inst{4} = op{0};
2705
+ let Inst{3} = i{0};
2706
+ let Inst{2-0} = imm3;
2707
+
2708
+ let Constraints = "$ZAda = $_ZAda";
2709
+ }
2710
+
2612
2711
// SME2 multi-vec indexed long long MLA one source 32-bit
2613
2712
class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
2614
2713
: I<(outs MatrixOp32:$ZAda),
0 commit comments