Skip to content

Commit 560f36d

Browse files
[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ array to vector
According to the specification in ARM-software/acle#309 this adds the intrinsics Move and zero multiple ZA single-vector groups to vector registers // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za");
1 parent 66453f2 commit 560f36d

File tree

9 files changed

+1082
-6
lines changed

9 files changed

+1082
-6
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,3 +674,15 @@ let TargetGuard = "sme2" in {
674674
def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
675675
def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
676676
}
677+
678+
multiclass ZAReadz<string vg_num>{
679+
let TargetGuard = "sme2p1" in {
680+
def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
681+
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
682+
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
683+
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
684+
}
685+
}
686+
687+
defm SVREADZ_VG2 : ZAReadz<"2">;
688+
defm SVREADZ_VG4 : ZAReadz<"4">;

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 711 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2839,6 +2839,17 @@ let TargetPrefix = "aarch64" in {
28392839
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
28402840
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
28412841

2842+
2843+
def int_aarch64_sme_readz_x2
2844+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2845+
[llvm_i32_ty],
2846+
[IntrNoMem, IntrHasSideEffects]>;
2847+
2848+
def int_aarch64_sme_readz_x4
2849+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
2850+
[llvm_i32_ty],
2851+
[IntrNoMem, IntrHasSideEffects]>;
2852+
28422853
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28432854

28442855
class SME_OuterProduct_Intrinsic
@@ -3646,4 +3657,4 @@ def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic;
36463657

36473658
def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic;
36483659

3649-
def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;
3660+
def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
392392
template <unsigned MaxIdx, unsigned Scale>
393393
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
394394
unsigned Op);
395-
395+
template <unsigned MaxIdx, unsigned Scale>
396+
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397+
unsigned Op);
396398
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
397399
/// SVE Reg+Imm addressing mode.
398400
template <int64_t Min, int64_t Max>
@@ -1985,6 +1987,35 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
19851987
CurDAG->RemoveDeadNode(N);
19861988
}
19871989

1990+
template <unsigned MaxIdx, unsigned Scale>
1991+
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
1992+
unsigned BaseReg,
1993+
unsigned Op) {
1994+
1995+
SDValue SliceBase = N->getOperand(2);
1996+
1997+
SDValue Base, Offset;
1998+
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1999+
return;
2000+
// The correct Za tile number is computed in Machine Instruction
2001+
// See EmitTileMovaz
2002+
// DAG cannot select Za tile as an output register with ZReg
2003+
SDLoc DL(N);
2004+
SDValue Ops[] = {Base, Offset, /*Chain*/ N->getOperand(0)};
2005+
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2006+
2007+
EVT VT = N->getValueType(0);
2008+
for (unsigned I = 0; I < NumVecs; ++I)
2009+
ReplaceUses(SDValue(N, I),
2010+
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2011+
SDValue(Mov, 0)));
2012+
2013+
// Copy chain
2014+
unsigned ChainIdx = NumVecs;
2015+
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2016+
CurDAG->RemoveDeadNode(N);
2017+
}
2018+
19882019
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
19892020
unsigned NumOutVecs,
19902021
bool IsTupleInput,
@@ -5175,6 +5206,48 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
51755206
AArch64::MOVA_VG4_4ZMXI);
51765207
return;
51775208
}
5209+
case Intrinsic::aarch64_sme_readz_x2: {
5210+
if (VT == MVT::nxv16i8) {
5211+
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
5212+
AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO);
5213+
return;
5214+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5215+
VT == MVT::nxv8bf16) {
5216+
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
5217+
AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO);
5218+
return;
5219+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5220+
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
5221+
AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO);
5222+
return;
5223+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5224+
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
5225+
AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO);
5226+
return;
5227+
}
5228+
break;
5229+
}
5230+
case Intrinsic::aarch64_sme_readz_x4: {
5231+
if (VT == MVT::nxv16i8) {
5232+
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
5233+
AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO);
5234+
return;
5235+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5236+
VT == MVT::nxv8bf16) {
5237+
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
5238+
AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO);
5239+
return;
5240+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5241+
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
5242+
AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO);
5243+
return;
5244+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5245+
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
5246+
AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO);
5247+
return;
5248+
}
5249+
break;
5250+
}
51785251
case Intrinsic::swift_async_context_addr: {
51795252
SDLoc DL(Node);
51805253
SDValue Chain = Node->getOperand(0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,22 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
28322832
return BB;
28332833
}
28342834

2835+
MachineBasicBlock *
2836+
AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg,
2837+
MachineInstr &MI,
2838+
MachineBasicBlock *BB) const {
2839+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2840+
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2841+
2842+
MIB.add(MI.getOperand(0)); // ZReg
2843+
MIB.addReg(BaseReg, RegState::Define); // add as output
2844+
MIB.addReg(BaseReg); // add as input
2845+
MIB.add(MI.getOperand(1)); // slice index register
2846+
MIB.add(MI.getOperand(2)); // slice index offset
2847+
MI.eraseFromParent(); // The pseudo is gone now.
2848+
return BB;
2849+
}
2850+
28352851
MachineBasicBlock *
28362852
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
28372853
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -2992,6 +3008,16 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
29923008
return EmitZero(MI, BB);
29933009
case AArch64::ZERO_T_PSEUDO:
29943010
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3011+
case AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO:
3012+
case AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO:
3013+
case AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO:
3014+
case AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO:
3015+
return EmitTileMovaz(AArch64::MOVAZ_VG2_2ZMXI, AArch64::ZA, MI, BB);
3016+
case AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO:
3017+
case AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO:
3018+
case AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO:
3019+
case AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO:
3020+
return EmitTileMovaz(AArch64::MOVAZ_VG4_4ZMXI, AArch64::ZA, MI, BB);
29953021
}
29963022
}
29973023

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,9 @@ class AArch64TargetLowering : public TargetLowering {
635635
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
636636
MachineInstr &MI,
637637
MachineBasicBlock *BB) const;
638+
MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg,
639+
MachineInstr &MI,
640+
MachineBasicBlock *BB) const;
638641
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
639642
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
640643
MachineInstr &MI, MachineBasicBlock *BB,

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,8 +780,8 @@ let Predicates = [HasSME2p1] in {
780780
defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz">;
781781
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
782782
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
783-
defm MOVAZ_VG2_2ZM : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
784-
defm MOVAZ_VG4_4ZM : sme2_mova_array_to_vec_vg4_multi<0b1100, "movaz">;
783+
defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
784+
defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;
785785

786786
defm ZERO_MXI : sme2p1_zero_matrix<"zero">;
787787

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, Re
104104
let usesCustomInserter = 1;
105105
}
106106

107+
class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
108+
SMEMatrixTypeEnum za_flag>
109+
: SMEPseudo2Instr<name, 0>,
110+
Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
111+
let SMEMatrixType = za_flag;
112+
let usesCustomInserter = 1;
113+
}
107114
//===----------------------------------------------------------------------===//
108115
// SME pattern match helpers.
109116
//===----------------------------------------------------------------------===//
@@ -4263,8 +4270,11 @@ multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
42634270
}
42644271

42654272
multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
4266-
def NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4267-
def NAME # _PSEUDO : sme2_movaz_to_tile_multi_pseudo
4273+
defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4274+
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, SMEMatrixArray>;
4275+
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_h_mul_r, SMEMatrixArray>;
4276+
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_s_mul_r, SMEMatrixArray>;
4277+
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
42684278
}
42694279

42704280
// move array to vector, four registers
@@ -4342,6 +4352,14 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
43424352
}
43434353
}
43444354

4355+
multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
4356+
defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
4357+
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, SMEMatrixArray>;
4358+
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_h_mul_r, SMEMatrixArray>;
4359+
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_s_mul_r, SMEMatrixArray>;
4360+
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
4361+
}
4362+
43454363
//===----------------------------------------------------------------------===//
43464364
// SME2 multi-vec saturating shift right narrow
43474365
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>

0 commit comments

Comments
 (0)