Skip to content

[AArch64] Expand vector ops when NEON and SVE are unavailable. #90833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 54 additions & 25 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,24 +360,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
addDRTypeForNEON(MVT::v4bf16);

addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
addQRTypeForNEON(MVT::v8bf16);

addDRType(MVT::v2f32);
addDRType(MVT::v8i8);
addDRType(MVT::v4i16);
addDRType(MVT::v2i32);
addDRType(MVT::v1i64);
addDRType(MVT::v1f64);
addDRType(MVT::v4f16);
addDRType(MVT::v4bf16);

addQRType(MVT::v4f32);
addQRType(MVT::v2f64);
addQRType(MVT::v16i8);
addQRType(MVT::v8i16);
addQRType(MVT::v4i32);
addQRType(MVT::v2i64);
addQRType(MVT::v8f16);
addQRType(MVT::v8bf16);
}

if (Subtarget->hasSVEorSME()) {
Expand Down Expand Up @@ -1125,7 +1125,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

if (Subtarget->hasNEON()) {
if (Subtarget->isNeonAvailable()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
for (auto Op :
Expand Down Expand Up @@ -1337,6 +1337,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// FADDP custom lowering
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::FADD, VT, Custom);
} else /* !isNeonAvailable */ {
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);

if (VT.is128BitVector() || VT.is64BitVector()) {
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
setOperationAction(ISD::BITCAST, VT,
Subtarget->isLittleEndian() ? Legal : Expand);
}
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's quite a gap here so perhaps worth adding /* !isNeonAvailable */?

Copy link
Collaborator

@paulwalker-arm paulwalker-arm May 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively should this be else if (!Subtarget->useSVEForFixedLengthVectors())? Just thinking there's several instances of "set everything to Expand" going on.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively should this be else if (!Subtarget->useSVEForFixedLengthVectors())? Just thinking there's several instances of "set everything to Expand" going on.

In practice this is not equivalent, because there are things missing from addTypeForFixedLengthSVE. For example, the loop that sets truncating-store actions:

MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
  setTruncStoreAction(VT, InnerVT, Default);
  ...
  InnerVT = InnerVT.changeVectorElementType(
      MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}

misses out on truncating store from v4i16 -> v4i1, which we'd want to Expand.

If we'd set it to Default (Custom) lowering, then it would try to lower it with SVE operations because the check useSVEForFixedLengthVectorVT is based on MVT::v4i16, not the MVT::v4i1, and the resulting truncating store would fail to select.

setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
}

if (Subtarget->hasSME()) {
Expand Down Expand Up @@ -2020,14 +2038,16 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::ZERO_EXTEND, VT, Default);
}

void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
void AArch64TargetLowering::addDRType(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT);
if (Subtarget->isNeonAvailable())
addTypeForNEON(VT);
}

void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
void AArch64TargetLowering::addQRType(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT);
if (Subtarget->isNeonAvailable())
addTypeForNEON(VT);
}

EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
Expand Down Expand Up @@ -9445,7 +9465,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {

SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SelectionDAG &DAG) const {
if (!Subtarget->hasNEON())
if (!Subtarget->isNeonAvailable() &&
!Subtarget->useSVEForFixedLengthVectors())
return SDValue();

EVT VT = Op.getValueType();
Expand Down Expand Up @@ -14141,6 +14162,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
}

bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT, unsigned DefinedValues) const {
if (!Subtarget->isNeonAvailable())
return false;
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
}

bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
Expand Down Expand Up @@ -19838,7 +19866,8 @@ performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// help, for example, to produce ssra from sshr+add.
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (VT != MVT::i64)
if (VT != MVT::i64 ||
DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
return SDValue();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1017,8 +1017,10 @@ class AArch64TargetLowering : public TargetLowering {

void addTypeForNEON(MVT VT);
void addTypeForFixedLengthSVE(MVT VT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
void addDRType(MVT VT);
void addQRType(MVT VT);

bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;

unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
SelectionDAG &DAG) const;
Expand Down
Loading
Loading