Skip to content

[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ array to vector #88901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -805,4 +805,16 @@ defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmChe
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;

multiclass ZAReadzArray<string vg_num>{
let SMETargetGuard = "sme2p1" in {
def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
}
}

defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;
} // let SVETargetGuard = InvalidMode
705 changes: 705 additions & 0 deletions clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -2869,6 +2869,16 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic;
def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic;

def int_aarch64_sme_readz_x2
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_readz_x4
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

class SME_OuterProduct_Intrinsic
Expand Down
32 changes: 26 additions & 6 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
unsigned Op);
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
unsigned MaxIdx, unsigned Scale);
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
unsigned Op, unsigned MaxIdx, unsigned Scale,
unsigned BaseReg = 0);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
template <int64_t Min, int64_t Max>
Expand Down Expand Up @@ -2006,18 +2007,27 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,

void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
unsigned Op, unsigned MaxIdx,
unsigned Scale) {
unsigned Scale, unsigned BaseReg) {
// Slice can be in different positions
// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
SDValue SliceBase = N->getOperand(2);
if (BaseReg != AArch64::ZA)
SliceBase = N->getOperand(3);

SDValue SliceBase = N->getOperand(3);
SDValue Base, Offset;
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
return;
// The correct Za tile number is computed in Machine Instruction
// See EmitZAInstr
// DAG cannot select Za tile as an output register with ZReg
SDLoc DL(N);
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
/*Chain*/ N->getOperand(0)};
SmallVector<SDValue, 6> Ops;
if (BaseReg != AArch64::ZA )
Ops.push_back(N->getOperand(2));
Ops.push_back(Base);
Ops.push_back(Offset);
Ops.push_back(N->getOperand(0)); //Chain
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);

EVT VT = N->getValueType(0);
Expand Down Expand Up @@ -5342,6 +5352,16 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
case Intrinsic::aarch64_sme_readz_x2: {
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
AArch64::ZA);
return;
}
case Intrinsic::aarch64_sme_readz_x4: {
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
AArch64::ZA);
return;
}
case Intrinsic::swift_async_context_addr: {
SDLoc DL(Node);
SDValue Chain = Node->getOperand(0);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2995,6 +2995,11 @@ AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile
StartIdx++;
} else {
// Avoids all instructions with mnemonic za.<sz>[Reg, Imm,
if (MI.getOperand(0).isReg() && !MI.getOperand(1).isImm()) {
MIB.add(MI.getOperand(StartIdx)); // Output ZPR
++StartIdx;
}
MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
}
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -818,8 +818,8 @@ defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz,
int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>;
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
defm MOVAZ_VG2_2ZM : sme2_mova_array_to_vec_vg2_multi<0b010, "movaz">;
defm MOVAZ_VG4_4ZM : sme2_mova_array_to_vec_vg4_multi<0b1100, "movaz">;
defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;

defm ZERO_MXI : sme2p1_zero_matrix<"zero">;

Expand Down
23 changes: 21 additions & 2 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, R
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}

class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}

//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4287,7 +4296,7 @@ class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
// move array to vector, two registers.
multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
mnemonic, "vgx2"> {
mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>{
bits<4> Zd;
let Inst{4-1} = Zd;
}
Expand Down Expand Up @@ -4359,10 +4368,15 @@ multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
}
}

multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
}

// move array to vector, four registers
multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
mnemonic, "vgx4"> {
mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
bits<3> Zd;
let Inst{4-2} = Zd;
}
Expand Down Expand Up @@ -4434,6 +4448,11 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
}
}

multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
}

//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
Expand Down
Loading
Loading