Skip to content

Commit 2270ec1

Browse files
committed
[AArch64] Expand vector ops when NEON and SVE are unavailable.
Unlike `+noneon` we must assume that vector types are available, i.e. it is valid to pass/return vector arguments to and from functions. However, the compiler must make sure to scalarize any vector operations.
1 parent 2f8b144 commit 2270ec1

File tree

60 files changed

+49847
-13189
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+49847
-13189
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3731,8 +3731,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
37313731
}
37323732
case ISD::SUB: {
37333733
EVT VT = Node->getValueType(0);
3734-
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3735-
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
3734+
assert((VT.isFixedLengthVector() || // fixed length ADD can be expanded to
3735+
// scalar ADD
3736+
(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
3737+
TLI.isOperationLegalOrCustom(ISD::XOR, VT))) &&
37363738
"Don't know how to expand this subtraction!");
37373739
Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
37383740
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
357357
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
358358
}
359359

360-
if (Subtarget->hasNEON()) {
360+
if (Subtarget->isNeonAvailable()) {
361361
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
362362
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
363363
// Someone set us up the NEON.
@@ -378,6 +378,28 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
378378
addQRTypeForNEON(MVT::v2i64);
379379
addQRTypeForNEON(MVT::v8f16);
380380
addQRTypeForNEON(MVT::v8bf16);
381+
} else if (Subtarget->hasNEON() ||
382+
Subtarget->useSVEForFixedLengthVectors()) {
383+
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
384+
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
385+
386+
addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
387+
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
388+
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
389+
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
390+
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
391+
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
392+
addRegisterClass(MVT::v4f16, &AArch64::FPR64RegClass);
393+
addRegisterClass(MVT::v4bf16, &AArch64::FPR64RegClass);
394+
395+
addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
396+
addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
397+
addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
398+
addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
399+
addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
400+
addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
401+
addRegisterClass(MVT::v8f16, &AArch64::FPR128RegClass);
402+
addRegisterClass(MVT::v8bf16, &AArch64::FPR128RegClass);
381403
}
382404

383405
if (Subtarget->hasSVEorSME()) {
@@ -1125,7 +1147,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11251147

11261148
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
11271149

1128-
if (Subtarget->hasNEON()) {
1150+
if (Subtarget->isNeonAvailable()) {
11291151
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
11301152
// silliness like this:
11311153
for (auto Op :
@@ -1328,6 +1350,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13281350
// FADDP custom lowering
13291351
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
13301352
setOperationAction(ISD::FADD, VT, Custom);
1353+
} else {
1354+
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1355+
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1356+
setOperationAction(Op, VT, Expand);
1357+
1358+
if (VT.is128BitVector() || VT.is64BitVector()) {
1359+
setOperationAction(ISD::LOAD, VT, Legal);
1360+
setOperationAction(ISD::STORE, VT, Legal);
1361+
setOperationAction(ISD::BITCAST, VT,
1362+
Subtarget->isLittleEndian() ? Legal : Expand);
1363+
}
1364+
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1365+
setTruncStoreAction(VT, InnerVT, Expand);
1366+
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1367+
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1368+
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1369+
}
1370+
}
13311371
}
13321372

13331373
if (Subtarget->hasSME()) {
@@ -9377,7 +9417,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
93779417

93789418
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
93799419
SelectionDAG &DAG) const {
9380-
if (!Subtarget->hasNEON())
9420+
if (!Subtarget->isNeonAvailable() &&
9421+
!Subtarget->useSVEForFixedLengthVectors())
93819422
return SDValue();
93829423

93839424
EVT VT = Op.getValueType();
@@ -14110,6 +14151,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1411014151
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
1411114152
}
1411214153

14154+
bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
14155+
EVT VT, unsigned DefinedValues) const {
14156+
if (!Subtarget->isNeonAvailable())
14157+
return false;
14158+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14159+
}
14160+
1411314161
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1411414162
// Currently no fixed length shuffles that require SVE are legal.
1411514163
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ class AArch64TargetLowering : public TargetLowering {
10201020
void addDRTypeForNEON(MVT VT);
10211021
void addQRTypeForNEON(MVT VT);
10221022

1023+
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1024+
10231025
unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
10241026
SelectionDAG &DAG) const;
10251027

0 commit comments

Comments
 (0)