Skip to content

Commit 3ef0a71

Browse files
[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ array to vector
According to the specification in ARM-software/acle#309 this adds the intrinsics Move and zero multiple ZA single-vector groups to vector registers // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za"); // Variants are also available for _za8_u8, _za16_s16, _za16_u16, // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32, // _za64_s64, _za64_u64 and _za64_f64 svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za");
1 parent 4606329 commit 3ef0a71

File tree

9 files changed

+1060
-14
lines changed

9 files changed

+1060
-14
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,4 +805,16 @@ defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmChe
805805
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
806806
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
807807
defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
808+
809+
multiclass ZAReadz<string vg_num>{
810+
let TargetGuard = "sme2p1" in {
811+
def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
812+
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
813+
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
814+
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
815+
}
816+
}
817+
818+
defm SVREADZ_VG2 : ZAReadz<"2">;
819+
defm SVREADZ_VG4 : ZAReadz<"4">;
808820
} // let SVETargetGuard = InvalidMode

clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Lines changed: 705 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,6 +2869,16 @@ let TargetPrefix = "aarch64" in {
28692869
def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic;
28702870
def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic;
28712871

2872+
def int_aarch64_sme_readz_x2
2873+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2874+
[llvm_i32_ty],
2875+
[IntrNoMem, IntrHasSideEffects]>;
2876+
2877+
def int_aarch64_sme_readz_x4
2878+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
2879+
[llvm_i32_ty],
2880+
[IntrNoMem, IntrHasSideEffects]>;
2881+
28722882
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
28732883

28742884
class SME_OuterProduct_Intrinsic

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
395395
template <unsigned MaxIdx, unsigned Scale>
396396
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397397
unsigned Op);
398-
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
399-
unsigned MaxIdx, unsigned Scale);
398+
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
399+
unsigned Op, unsigned MaxIdx, unsigned Scale, unsigned BaseReg = 0);
400400
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401401
/// SVE Reg+Imm addressing mode.
402402
template <int64_t Min, int64_t Max>
@@ -2006,18 +2006,27 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
20062006

20072007
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
20082008
unsigned Op, unsigned MaxIdx,
2009-
unsigned Scale) {
2009+
unsigned Scale, unsigned BaseReg) {
2010+
// Slice can be in different positions
2011+
// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2012+
// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2013+
SDValue SliceBase = N->getOperand(2);
2014+
if (BaseReg != AArch64::ZA)
2015+
SliceBase = N->getOperand(3);
20102016

2011-
SDValue SliceBase = N->getOperand(3);
20122017
SDValue Base, Offset;
20132018
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
20142019
return;
20152020
// The correct Za tile number is computed in Machine Instruction
2016-
// See EmitZAInstr
2021+
// See EmitZAIntr
20172022
// DAG cannot select Za tile as an output register with ZReg
20182023
SDLoc DL(N);
2019-
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
2020-
/*Chain*/ N->getOperand(0)};
2024+
SmallVector<SDValue, 6> Ops;
2025+
if (BaseReg != AArch64::ZA )
2026+
Ops.push_back(N->getOperand(2));
2027+
Ops.push_back(Base);
2028+
Ops.push_back(Offset);
2029+
Ops.push_back(N->getOperand(0)); //Chain
20212030
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
20222031

20232032
EVT VT = N->getValueType(0);
@@ -5286,7 +5295,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
52865295
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
52875296
return;
52885297
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5289-
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5298+
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO,0, 2);
52905299
return;
52915300
}
52925301
break;
@@ -5342,6 +5351,48 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53425351
}
53435352
break;
53445353
}
5354+
case Intrinsic::aarch64_sme_readz_x2: {
5355+
if (VT == MVT::nxv16i8) {
5356+
SelectMultiVectorMoveZ(Node, 2,
5357+
AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO, 7, 1, AArch64::ZA);
5358+
return;
5359+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5360+
VT == MVT::nxv8bf16) {
5361+
SelectMultiVectorMoveZ(Node, 2,
5362+
AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO, 7, 1, AArch64::ZA);
5363+
return;
5364+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5365+
SelectMultiVectorMoveZ(Node, 2,
5366+
AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO, 7, 1, AArch64::ZA);
5367+
return;
5368+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5369+
SelectMultiVectorMoveZ(Node, 2,
5370+
AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO, 7, 1, AArch64::ZA);
5371+
return;
5372+
}
5373+
break;
5374+
}
5375+
case Intrinsic::aarch64_sme_readz_x4: {
5376+
if (VT == MVT::nxv16i8) {
5377+
SelectMultiVectorMoveZ(Node, 4,
5378+
AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO, 7, 1, AArch64::ZA);
5379+
return;
5380+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5381+
VT == MVT::nxv8bf16) {
5382+
SelectMultiVectorMoveZ(Node, 4,
5383+
AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO, 7, 1, AArch64::ZA);
5384+
return;
5385+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5386+
SelectMultiVectorMoveZ(Node, 4,
5387+
AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO, 7, 1, AArch64::ZA);
5388+
return;
5389+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5390+
SelectMultiVectorMoveZ(Node, 4,
5391+
AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO, 7, 1, AArch64::ZA);
5392+
return;
5393+
}
5394+
break;
5395+
}
53455396
case Intrinsic::swift_async_context_addr: {
53465397
SDLoc DL(Node);
53475398
SDValue Chain = Node->getOperand(0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,6 +2943,22 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
29432943
return BB;
29442944
}
29452945

2946+
MachineBasicBlock *
2947+
AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg,
2948+
MachineInstr &MI,
2949+
MachineBasicBlock *BB) const {
2950+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2951+
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2952+
2953+
MIB.add(MI.getOperand(0)); // ZReg
2954+
MIB.addReg(BaseReg, RegState::Define); // add as output
2955+
MIB.addReg(BaseReg); // add as input
2956+
MIB.add(MI.getOperand(1)); // slice index register
2957+
MIB.add(MI.getOperand(2)); // slice index offset
2958+
MI.eraseFromParent(); // The pseudo is gone now.
2959+
return BB;
2960+
}
2961+
29462962
MachineBasicBlock *
29472963
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
29482964
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -3187,6 +3203,16 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
31873203
return EmitZero(MI, BB);
31883204
case AArch64::ZERO_T_PSEUDO:
31893205
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3206+
case AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO:
3207+
case AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO:
3208+
case AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO:
3209+
case AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO:
3210+
return EmitTileMovaz(AArch64::MOVAZ_VG2_2ZMXI, AArch64::ZA, MI, BB);
3211+
case AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO:
3212+
case AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO:
3213+
case AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO:
3214+
case AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO:
3215+
return EmitTileMovaz(AArch64::MOVAZ_VG4_4ZMXI, AArch64::ZA, MI, BB);
31903216
}
31913217
}
31923218

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,9 @@ class AArch64TargetLowering : public TargetLowering {
653653
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654654
MachineInstr &MI,
655655
MachineBasicBlock *BB) const;
656+
MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg,
657+
MachineInstr &MI,
658+
MachineBasicBlock *BB) const;
656659
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657660
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658661
MachineInstr &MI, MachineBasicBlock *BB) const;

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,8 +818,8 @@ defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz,
818818
int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>;
819819
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
820820
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
821-
defm MOVAZ_VG2_2ZM : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
822-
defm MOVAZ_VG4_4ZM : sme2_mova_array_to_vec_vg4_multi<0b1100, "movaz">;
821+
defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
822+
defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;
823823

824824
defm ZERO_MXI : sme2p1_zero_matrix<"zero">;
825825

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,15 @@ class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, R
117117
let SMEMatrixType = za_flag;
118118
let usesCustomInserter = 1;
119119
}
120+
121+
class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
122+
SMEMatrixTypeEnum za_flag>
123+
: SMEPseudo2Instr<name, 0>,
124+
Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
125+
let SMEMatrixType = za_flag;
126+
let usesCustomInserter = 1;
127+
}
128+
120129
//===----------------------------------------------------------------------===//
121130
// SME pattern match helpers.
122131
//===----------------------------------------------------------------------===//
@@ -4360,8 +4369,11 @@ multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
43604369
}
43614370

43624371
multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
4363-
def NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4364-
def NAME # _PSEUDO : sme2_movaz_to_tile_multi_pseudo
4372+
defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4373+
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, SMEMatrixArray>;
4374+
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_h_mul_r, SMEMatrixArray>;
4375+
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_s_mul_r, SMEMatrixArray>;
4376+
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
43654377
}
43664378

43674379
// move array to vector, four registers
@@ -4439,6 +4451,14 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
44394451
}
44404452
}
44414453

4454+
multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
4455+
defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
4456+
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, SMEMatrixArray>;
4457+
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_h_mul_r, SMEMatrixArray>;
4458+
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_s_mul_r, SMEMatrixArray>;
4459+
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
4460+
}
4461+
44424462
//===----------------------------------------------------------------------===//
44434463
// SME2 multi-vec saturating shift right narrow
44444464
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>

0 commit comments

Comments
 (0)