@@ -111,7 +111,7 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
111
111
let usesCustomInserter = 1;
112
112
}
113
113
114
- class sme2_movez_to_tile_multi_pseudo <string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
114
+ class sme2_movez_to_tile_pseudo <string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115
115
: SMEPseudo2Instr<name, 0>,
116
116
Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117
117
let SMEMatrixType = za_flag;
@@ -205,6 +205,11 @@ class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand
205
205
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
206
206
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
207
207
(!cast<Instruction>(name) $base, $offset)>;
208
+
209
+ class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice>
210
+ : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))),
211
+ (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>;
212
+
208
213
//===----------------------------------------------------------------------===//
209
214
// SME pattern match helpers.
210
215
//===----------------------------------------------------------------------===//
@@ -4099,21 +4104,22 @@ multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
4099
4104
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
4100
4105
}
4101
4106
4107
+
4102
4108
// SME2p1 move tile to vector and zero tile, two registers
4103
4109
multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
4104
4110
defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
4105
4111
defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4106
4112
4107
4113
4108
- def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4109
- def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4110
- def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4111
- def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4114
+ def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4115
+ def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4116
+ def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4117
+ def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4112
4118
4113
- def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4114
- def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4115
- def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4116
- def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4119
+ def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4120
+ def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4121
+ def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4122
+ def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4117
4123
}
4118
4124
4119
4125
class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
@@ -4246,15 +4252,15 @@ multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
4246
4252
defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
4247
4253
defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4248
4254
4249
- def NAME # _H_B_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4250
- def NAME # _H_H_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4251
- def NAME # _H_S_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4252
- def NAME # _H_D_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4255
+ def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4256
+ def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4257
+ def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4258
+ def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4253
4259
4254
- def NAME # _V_B_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4255
- def NAME # _V_H_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4256
- def NAME # _V_S_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4257
- def NAME # _V_D_PSEUDO : sme2_movez_to_tile_multi_pseudo <NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4260
+ def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4261
+ def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4262
+ def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4263
+ def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo <NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4258
4264
}
4259
4265
4260
4266
@@ -4788,14 +4794,14 @@ class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty
4788
4794
multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4789
4795
def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
4790
4796
!if(v, TileVectorOpV8, TileVectorOpH8),
4791
- sme_elm_idx0_15, mnemonic> {
4797
+ sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4792
4798
bits<4> imm;
4793
4799
let Inst{8-5} = imm;
4794
4800
}
4795
4801
4796
4802
def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
4797
4803
!if(v, TileVectorOpV16, TileVectorOpH16),
4798
- sme_elm_idx0_7, mnemonic> {
4804
+ sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4799
4805
bits<1> ZAn;
4800
4806
bits<3> imm;
4801
4807
let Inst{8} = ZAn;
@@ -4804,7 +4810,7 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4804
4810
4805
4811
def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
4806
4812
!if(v, TileVectorOpV32, TileVectorOpH32),
4807
- sme_elm_idx0_3, mnemonic> {
4813
+ sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4808
4814
bits<2> ZAn;
4809
4815
bits<2> imm;
4810
4816
let Inst{8-7} = ZAn;
@@ -4813,7 +4819,7 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4813
4819
4814
4820
def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
4815
4821
!if(v, TileVectorOpV64, TileVectorOpH64),
4816
- sme_elm_idx0_1, mnemonic> {
4822
+ sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4817
4823
bits<3> ZAn;
4818
4824
bits<1> imm;
4819
4825
let Inst{8-6} = ZAn;
@@ -4822,15 +4828,66 @@ multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4822
4828
4823
4829
def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
4824
4830
!if(v, TileVectorOpV128, TileVectorOpH128),
4825
- sme_elm_idx0_0, mnemonic> {
4831
+ sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr<NAME # _Q, 1> {
4826
4832
bits<4> ZAn;
4827
4833
let Inst{8-5} = ZAn;
4828
4834
}
4829
4835
}
4830
4836
4831
- multiclass sme2p1_movaz_tile_to_vec<string mnemonic>{
4837
+ multiclass sme2p1_movaz_tile_to_vec<string mnemonic, SDPatternOperator intrinsic_horiz, SDPatternOperator intrinsic_vert,
4838
+ SDPatternOperator intrinsic_horiz_q, SDPatternOperator intrinsic_vert_q>{
4832
4839
defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
4833
4840
defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
4841
+
4842
+ def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
4843
+ def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
4844
+ def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
4845
+ def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
4846
+ def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
4847
+
4848
+ def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
4849
+ def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
4850
+ def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
4851
+ def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
4852
+ def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
4853
+
4854
+ def : SME2_Tile_Movaz_Pat<NAME # _H_B, intrinsic_horiz, nxv16i8,sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
4855
+ def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4856
+ def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4857
+ def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4858
+ def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4859
+ def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4860
+ def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4861
+ def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4862
+
4863
+ def : SME2_Tile_Movaz_Pat<NAME # _V_B, intrinsic_vert, nxv16i8, sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
4864
+ def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4865
+ def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4866
+ def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4867
+ def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4868
+ def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4869
+ def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4870
+ def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4871
+
4872
+ // H_Q
4873
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4874
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4875
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4876
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4877
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4878
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4879
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4880
+ def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4881
+
4882
+ // _V_Q
4883
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4884
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4885
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4886
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4887
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4888
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4889
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4890
+ def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4834
4891
}
4835
4892
4836
4893
//===----------------------------------------------------------------------===//
0 commit comments