Skip to content

Commit 651e42f

Browse files
committed
[AArch64] Expand vector ops when NEON and SVE are unavailable.
Unlike `+noneon` we must assume that vector types are available, i.e. it is valid to pass/return vector arguments to and from functions. However, the compiler must make sure to scalarize any vector operations.
1 parent eb3a671 commit 651e42f

File tree

60 files changed

+49846
-13189
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+49846
-13189
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3731,8 +3731,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
37313731
}
37323732
case ISD::SUB: {
37333733
EVT VT = Node->getValueType(0);
3734-
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3735-
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
3734+
assert((VT.isFixedLengthVector() || // fixed length ADD can be expanded to
3735+
// scalar ADD
3736+
(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3737+
TLI.isOperationLegalOrCustom(ISD::XOR, VT))) &&
37363738
"Don't know how to expand this subtraction!");
37373739
Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
37383740
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
357357
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
358358
}
359359

360-
if (Subtarget->hasNEON()) {
360+
if (Subtarget->isNeonAvailable()) {
361361
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
362362
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
363363
// Someone set us up the NEON.
@@ -378,6 +378,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
378378
addQRTypeForNEON(MVT::v2i64);
379379
addQRTypeForNEON(MVT::v8f16);
380380
addQRTypeForNEON(MVT::v8bf16);
381+
} else if (Subtarget->hasNEON() || Subtarget->useSVEForFixedLengthVectors()) {
382+
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
383+
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
384+
385+
addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
386+
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
387+
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
388+
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
389+
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
390+
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
391+
addRegisterClass(MVT::v4f16, &AArch64::FPR64RegClass);
392+
addRegisterClass(MVT::v4bf16, &AArch64::FPR64RegClass);
393+
394+
addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
395+
addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
396+
addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
397+
addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
398+
addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
399+
addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
400+
addRegisterClass(MVT::v8f16, &AArch64::FPR128RegClass);
401+
addRegisterClass(MVT::v8bf16, &AArch64::FPR128RegClass);
381402
}
382403

383404
if (Subtarget->hasSVEorSME()) {
@@ -1125,7 +1146,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11251146

11261147
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
11271148

1128-
if (Subtarget->hasNEON()) {
1149+
if (Subtarget->isNeonAvailable()) {
11291150
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
11301151
// silliness like this:
11311152
for (auto Op :
@@ -1328,6 +1349,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13281349
// FADDP custom lowering
13291350
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
13301351
setOperationAction(ISD::FADD, VT, Custom);
1352+
} else {
1353+
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1354+
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1355+
setOperationAction(Op, VT, Expand);
1356+
1357+
if (VT.is128BitVector() || VT.is64BitVector()) {
1358+
setOperationAction(ISD::LOAD, VT, Legal);
1359+
setOperationAction(ISD::STORE, VT, Legal);
1360+
setOperationAction(ISD::BITCAST, VT,
1361+
Subtarget->isLittleEndian() ? Legal : Expand);
1362+
}
1363+
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1364+
setTruncStoreAction(VT, InnerVT, Expand);
1365+
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1366+
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1367+
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1368+
}
1369+
}
13311370
}
13321371

13331372
if (Subtarget->hasSME()) {
@@ -9377,7 +9416,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
93779416

93789417
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
93799418
SelectionDAG &DAG) const {
9380-
if (!Subtarget->hasNEON())
9419+
if (!Subtarget->isNeonAvailable() &&
9420+
!Subtarget->useSVEForFixedLengthVectors())
93819421
return SDValue();
93829422

93839423
EVT VT = Op.getValueType();
@@ -14110,6 +14150,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1411014150
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
1411114151
}
1411214152

14153+
bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
14154+
EVT VT, unsigned DefinedValues) const {
14155+
if (!Subtarget->isNeonAvailable())
14156+
return false;
14157+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14158+
}
14159+
1411314160
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1411414161
// Currently no fixed length shuffles that require SVE are legal.
1411514162
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ class AArch64TargetLowering : public TargetLowering {
10201020
void addDRTypeForNEON(MVT VT);
10211021
void addQRTypeForNEON(MVT VT);
10221022

1023+
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1024+
10231025
unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
10241026
SelectionDAG &DAG) const;
10251027

0 commit comments

Comments
 (0)