Skip to content

Commit 9d6911f

Browse files
committed
[AArch64] Extend usage of XAR instruction for fixed-length operations
Resolves #139229 In #137162, support for `v2i64` was implemented for vector rotate transformation, although types like `v4i32`, `v8i16` and `v16i8` do not have Neon SHA3, we can use SVE operations if sve2-sha3 is available.
1 parent ee91f9b commit 9d6911f

File tree

2 files changed

+422
-23
lines changed

2 files changed

+422
-23
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4606,7 +4606,33 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
46064606
return false;
46074607
}
46084608

4609-
if (!Subtarget->hasSHA3())
4609+
// We have Neon SHA3 XAR operation for v2i64 but for types
4610+
// v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4611+
// is available.
4612+
EVT SVT;
4613+
switch (VT.getSimpleVT().SimpleTy) {
4614+
case MVT::v4i32:
4615+
case MVT::v2i32:
4616+
SVT = MVT::nxv4i32;
4617+
break;
4618+
case MVT::v8i16:
4619+
case MVT::v4i16:
4620+
SVT = MVT::nxv8i16;
4621+
break;
4622+
case MVT::v16i8:
4623+
case MVT::v8i8:
4624+
SVT = MVT::nxv16i8;
4625+
break;
4626+
case MVT::v2i64:
4627+
case MVT::v1i64:
4628+
SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4629+
break;
4630+
default:
4631+
return false;
4632+
}
4633+
4634+
if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4635+
(SVT.isScalableVector() && !Subtarget->hasSVE2()))
46104636
return false;
46114637

46124638
if (N0->getOpcode() != AArch64ISD::VSHL ||
@@ -4632,41 +4658,97 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
46324658
SDValue Imm = CurDAG->getTargetConstant(
46334659
ShAmt, DL, N0.getOperand(1).getValueType(), false);
46344660

4635-
if (ShAmt + HsAmt != 64)
4661+
unsigned VTSizeInBits = VT.getScalarSizeInBits();
4662+
if (ShAmt + HsAmt != VTSizeInBits)
46364663
return false;
46374664

46384665
if (!IsXOROperand) {
46394666
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4640-
SDNode *MOV =
4641-
CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4667+
SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, SVT, Zero);
46424668
SDValue MOVIV = SDValue(MOV, 0);
4669+
46434670
R1 = N1->getOperand(0);
46444671
R2 = MOVIV;
46454672
}
46464673

4647-
// If the input is a v1i64, widen to a v2i64 to use XAR.
4648-
assert((VT == MVT::v1i64 || VT == MVT::v2i64) && "Unexpected XAR type!");
4649-
if (VT == MVT::v1i64) {
4650-
EVT SVT = MVT::v2i64;
4674+
if (SVT.isScalableVector()) {
4675+
SDValue Undef =
4676+
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4677+
4678+
if (VT.is64BitVector()) {
4679+
EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4680+
4681+
SDValue UndefQ = SDValue(
4682+
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4683+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4684+
4685+
R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4686+
Undef, R1, DSub),
4687+
0);
4688+
if (R2.getValueType() == VT)
4689+
R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4690+
Undef, R2, DSub),
4691+
0);
4692+
}
4693+
4694+
SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4695+
4696+
R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4697+
R1, ZSub),
4698+
0);
4699+
R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4700+
R2, ZSub),
4701+
0);
4702+
}
4703+
4704+
if (!SVT.isScalableVector() && SVT != VT) {
46514705
SDValue Undef =
46524706
SDValue(CurDAG->getMachineNode(AArch64::IMPLICIT_DEF, DL, SVT), 0);
46534707
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4708+
46544709
R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
46554710
R1, DSub),
46564711
0);
4657-
if (R2.getValueType() == MVT::v1i64)
4712+
if (R2.getValueType() != SVT)
46584713
R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
46594714
Undef, R2, DSub),
46604715
0);
46614716
}
46624717

46634718
SDValue Ops[] = {R1, R2, Imm};
4664-
SDNode *XAR = CurDAG->getMachineNode(AArch64::XAR, DL, MVT::v2i64, Ops);
4719+
SDNode *XAR = nullptr;
46654720

4666-
if (VT == MVT::v1i64) {
4721+
if (SVT.isScalableVector()) {
4722+
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4723+
SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4724+
AArch64::XAR_ZZZI_D}))
4725+
XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4726+
} else {
4727+
XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4728+
}
4729+
4730+
assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4731+
4732+
if (!SVT.isScalableVector() && SVT != VT) {
46674733
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
46684734
XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
46694735
SDValue(XAR, 0), DSub);
4736+
} else if (SVT.isScalableVector()) {
4737+
if (VT.is64BitVector()) {
4738+
EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4739+
4740+
SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4741+
SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4742+
SDValue(XAR, 0), ZSub);
4743+
4744+
SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4745+
XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4746+
SDValue(Q, 0), DSub);
4747+
} else {
4748+
SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4749+
XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4750+
SDValue(XAR, 0), ZSub);
4751+
}
46704752
}
46714753
ReplaceNode(N, XAR);
46724754
return true;

0 commit comments

Comments
 (0)