Skip to content

Commit 9438023

Browse files
committed
[AArch64] Expand vector ops when NEON and SVE are unavailable.
Unlike `+noneon` we must assume that vector types are available, i.e. it is valid to pass/return vector arguments to and from functions. However, the compiler must make sure to scalarize any vector operations.
1 parent 46a30df commit 9438023

File tree

60 files changed

+49840
-13204
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+49840
-13204
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3740,8 +3740,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
37403740
}
37413741
case ISD::SUB: {
37423742
EVT VT = Node->getValueType(0);
3743-
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3744-
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
3743+
assert((VT.isFixedLengthVector() || // fixed length ADD can be expanded to
3744+
// scalar ADD
3745+
(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3746+
TLI.isOperationLegalOrCustom(ISD::XOR, VT))) &&
37453747
"Don't know how to expand this subtraction!");
37463748
Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
37473749
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
357357
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
358358
}
359359

360-
if (Subtarget->hasNEON()) {
360+
if (Subtarget->isNeonAvailable()) {
361361
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
362362
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
363363
// Someone set us up the NEON.
@@ -378,6 +378,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
378378
addQRTypeForNEON(MVT::v2i64);
379379
addQRTypeForNEON(MVT::v8f16);
380380
addQRTypeForNEON(MVT::v8bf16);
381+
} else if (Subtarget->hasNEON() || Subtarget->useSVEForFixedLengthVectors()) {
382+
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
383+
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
384+
385+
addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
386+
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
387+
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
388+
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
389+
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
390+
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
391+
addRegisterClass(MVT::v4f16, &AArch64::FPR64RegClass);
392+
addRegisterClass(MVT::v4bf16, &AArch64::FPR64RegClass);
393+
394+
addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
395+
addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
396+
addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
397+
addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
398+
addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
399+
addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
400+
addRegisterClass(MVT::v8f16, &AArch64::FPR128RegClass);
401+
addRegisterClass(MVT::v8bf16, &AArch64::FPR128RegClass);
381402
}
382403

383404
if (Subtarget->hasSVEorSME()) {
@@ -1125,7 +1146,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11251146

11261147
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
11271148

1128-
if (Subtarget->hasNEON()) {
1149+
if (Subtarget->isNeonAvailable()) {
11291150
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
11301151
// silliness like this:
11311152
for (auto Op :
@@ -1337,6 +1358,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13371358
// FADDP custom lowering
13381359
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
13391360
setOperationAction(ISD::FADD, VT, Custom);
1361+
} else {
1362+
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1363+
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1364+
setOperationAction(Op, VT, Expand);
1365+
1366+
if (VT.is128BitVector() || VT.is64BitVector()) {
1367+
setOperationAction(ISD::LOAD, VT, Legal);
1368+
setOperationAction(ISD::STORE, VT, Legal);
1369+
setOperationAction(ISD::BITCAST, VT,
1370+
Subtarget->isLittleEndian() ? Legal : Expand);
1371+
}
1372+
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1373+
setTruncStoreAction(VT, InnerVT, Expand);
1374+
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1375+
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1376+
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1377+
}
1378+
}
13401379
}
13411380

13421381
if (Subtarget->hasSME()) {
@@ -9445,7 +9484,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
94459484

94469485
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
94479486
SelectionDAG &DAG) const {
9448-
if (!Subtarget->hasNEON())
9487+
if (!Subtarget->isNeonAvailable() &&
9488+
!Subtarget->useSVEForFixedLengthVectors())
94499489
return SDValue();
94509490

94519491
EVT VT = Op.getValueType();
@@ -14141,6 +14181,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1414114181
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
1414214182
}
1414314183

14184+
bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
14185+
EVT VT, unsigned DefinedValues) const {
14186+
if (!Subtarget->isNeonAvailable())
14187+
return false;
14188+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14189+
}
14190+
1414414191
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1414514192
// Currently no fixed length shuffles that require SVE are legal.
1414614193
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ class AArch64TargetLowering : public TargetLowering {
10201020
void addDRTypeForNEON(MVT VT);
10211021
void addQRTypeForNEON(MVT VT);
10221022

1023+
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1024+
10231025
unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
10241026
SelectionDAG &DAG) const;
10251027

0 commit comments

Comments
 (0)