-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors #88710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c705f70
6d6de45
ba423c0
189fd87
f7066f4
c384922
4f41014
f74f21f
7098d78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -395,7 +395,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { | |
template <unsigned MaxIdx, unsigned Scale> | ||
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, | ||
unsigned Op); | ||
|
||
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op, | ||
unsigned MaxIdx, unsigned Scale); | ||
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); | ||
/// SVE Reg+Imm addressing mode. | ||
template <int64_t Min, int64_t Max> | ||
|
@@ -2003,6 +2004,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, | |
CurDAG->RemoveDeadNode(N); | ||
} | ||
|
||
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The real question is why is this is a function template or, if you want, why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good point. Somehow I assumed we are using the template version of |
||
unsigned Op, unsigned MaxIdx, | ||
unsigned Scale) { | ||
|
||
SDValue SliceBase = N->getOperand(3); | ||
SDValue Base, Offset; | ||
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) | ||
return; | ||
// The correct Za tile number is computed in Machine Instruction | ||
// See EmitZAInstr | ||
// DAG cannot select Za tile as an output register with ZReg | ||
SDLoc DL(N); | ||
SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset, | ||
/*Chain*/ N->getOperand(0)}; | ||
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); | ||
|
||
EVT VT = N->getValueType(0); | ||
for (unsigned I = 0; I < NumVecs; ++I) | ||
ReplaceUses(SDValue(N, I), | ||
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, | ||
SDValue(Mov, 0))); | ||
|
||
// Copy chain | ||
unsigned ChainIdx = NumVecs; | ||
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); | ||
CurDAG->RemoveDeadNode(N); | ||
} | ||
|
||
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, | ||
unsigned NumOutVecs, | ||
bool IsTupleInput, | ||
|
@@ -5245,6 +5274,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { | |
AArch64::MOVA_VG4_4ZMXI); | ||
return; | ||
} | ||
case Intrinsic::aarch64_sme_readz_horiz_x2: { | ||
if (VT == MVT::nxv16i8) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2); | ||
return; | ||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || | ||
VT == MVT::nxv8bf16) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2); | ||
return; | ||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2); | ||
return; | ||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2); | ||
return; | ||
} | ||
break; | ||
} | ||
case Intrinsic::aarch64_sme_readz_vert_x2: { | ||
if (VT == MVT::nxv16i8) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2); | ||
return; | ||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || | ||
VT == MVT::nxv8bf16) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2); | ||
return; | ||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2); | ||
return; | ||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { | ||
SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2); | ||
return; | ||
} | ||
break; | ||
} | ||
case Intrinsic::aarch64_sme_readz_horiz_x4: { | ||
if (VT == MVT::nxv16i8) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4); | ||
return; | ||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || | ||
VT == MVT::nxv8bf16) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4); | ||
return; | ||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4); | ||
return; | ||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4); | ||
return; | ||
} | ||
break; | ||
} | ||
case Intrinsic::aarch64_sme_readz_vert_x4: { | ||
if (VT == MVT::nxv16i8) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4); | ||
return; | ||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || | ||
VT == MVT::nxv8bf16) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4); | ||
return; | ||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4); | ||
return; | ||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { | ||
SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4); | ||
return; | ||
} | ||
break; | ||
} | ||
case Intrinsic::swift_async_context_addr: { | ||
SDLoc DL(Node); | ||
SDValue Chain = Node->getOperand(0); | ||
|
Uh oh!
There was an error while loading. Please reload this page.