@@ -1394,9 +1394,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1394
1394
}
1395
1395
}
1396
1396
1397
- // v1i64 -> v1i8 truncstore represents a bsub FPR8 store.
1398
- setTruncStoreAction(MVT::v1i64, MVT::v1i8, Legal);
1399
-
1400
1397
for (auto Op :
1401
1398
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1402
1399
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
@@ -23981,6 +23978,8 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
23981
23978
static unsigned getFPSubregForVT(EVT VT) {
23982
23979
assert(VT.isSimple() && "Expected simple VT");
23983
23980
switch (VT.getSimpleVT().SimpleTy) {
23981
+ case MVT::aarch64mfp8:
23982
+ return AArch64::bsub;
23984
23983
case MVT::f16:
23985
23984
return AArch64::hsub;
23986
23985
case MVT::f32:
@@ -23992,22 +23991,6 @@ static unsigned getFPSubregForVT(EVT VT) {
23992
23991
}
23993
23992
}
23994
23993
23995
- static EVT get64BitVector(EVT ElVT) {
23996
- assert(ElVT.isSimple() && "Expected simple VT");
23997
- switch (ElVT.getSimpleVT().SimpleTy) {
23998
- case MVT::i8:
23999
- return MVT::v8i8;
24000
- case MVT::i16:
24001
- return MVT::v4i16;
24002
- case MVT::i32:
24003
- return MVT::v2i32;
24004
- case MVT::i64:
24005
- return MVT::v1i64;
24006
- default:
24007
- llvm_unreachable("Unexpected VT!");
24008
- }
24009
- }
24010
-
24011
23994
static SDValue performSTORECombine(SDNode *N,
24012
23995
TargetLowering::DAGCombinerInfo &DCI,
24013
23996
SelectionDAG &DAG,
@@ -24086,72 +24069,63 @@ static SDValue performSTORECombine(SDNode *N,
24086
24069
SDValue ExtIdx = Value.getOperand(1);
24087
24070
EVT VectorVT = Vector.getValueType();
24088
24071
EVT ElemVT = VectorVT.getVectorElementType();
24072
+
24089
24073
if (!ValueVT.isInteger())
24090
24074
return SDValue();
24091
24075
if (ValueVT != MemVT && !ST->isTruncatingStore())
24092
24076
return SDValue();
24093
24077
24094
- if (MemVT == MVT::i8) {
24095
- auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
24096
- if (Subtarget->isNeonAvailable() &&
24097
- (VectorVT == MVT::v8i8 || VectorVT == MVT::v16i8) && ExtCst &&
24098
- !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
24099
- // These can lower to st1.b, which is preferable if we're unlikely to
24100
- // fold the addressing into the store.
24101
- return SDValue();
24102
- }
24103
-
24104
- // Lower as truncstore of v1i64 -> v1i8 (which can lower to a bsub store).
24105
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24106
- SDValue ExtVector;
24107
- EVT VecVT64 = get64BitVector(ElemVT);
24108
- if (ExtCst && ExtCst->isZero()) {
24109
- ExtVector =
24110
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT64, Vector, Zero);
24111
- } else {
24112
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24113
- Value.getValueType(), Vector, ExtIdx);
24114
- ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT64,
24115
- DAG.getUNDEF(VecVT64), Ext, Zero);
24116
- }
24117
-
24118
- SDValue Cast = DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, ExtVector);
24119
- return DAG.getTruncStore(ST->getChain(), DL, Cast, ST->getBasePtr(),
24120
- MVT::v1i8, ST->getMemOperand());
24121
- }
24122
-
24123
- // TODO: Handle storing i8s to wider types.
24124
- if (ElemVT == MVT::i8)
24078
+ // This could generate an additional extract if the index is non-zero and
24079
+ // the extracted value has multiple uses.
24080
+ auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
24081
+ if ((!ExtCst || !ExtCst->isZero()) && !Value.hasOneUse())
24125
24082
return SDValue();
24126
24083
24127
- // Heuristic: If there are other users of integer scalars extracted from
24128
- // this vector that won't fold into the store -- abandon folding. Applying
24129
- // this fold may extend the vector lifetime and disrupt paired stores.
24130
- for (const auto &Use : Vector->uses()) {
24131
- if (Use.getResNo() != Vector.getResNo())
24132
- continue;
24133
- const SDNode *User = Use.getUser();
24134
- if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24135
- (!User->hasOneUse() ||
24136
- (*User->user_begin())->getOpcode() != ISD::STORE))
24137
- return SDValue();
24084
+ if (Subtarget->isNeonAvailable() && ElemVT == MemVT &&
24085
+ (VectorVT.is64BitVector() || VectorVT.is128BitVector()) && ExtCst &&
24086
+ !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
24087
+ // These can lower to st1, which is preferable if we're unlikely to fold
24088
+ // the addressing into the store.
24089
+ return SDValue();
24138
24090
}
24139
24091
24140
- EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
24141
- EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
24142
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
24143
- SDValue Ext =
24144
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, FPElemVT, Cast, ExtIdx);
24092
+ if (MemVT == MVT::i64 || MemVT == MVT::i32) {
24093
+ // Heuristic: If there are other users of w/x integer scalars extracted
24094
+ // from this vector that won't fold into the store -- abandon folding.
24095
+ // Applying this fold may disrupt paired stores.
24096
+ for (const auto &Use : Vector->uses()) {
24097
+ if (Use.getResNo() != Vector.getResNo())
24098
+ continue;
24099
+ const SDNode *User = Use.getUser();
24100
+ if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24101
+ (!User->hasOneUse() ||
24102
+ (*User->user_begin())->getOpcode() != ISD::STORE))
24103
+ return SDValue();
24104
+ }
24105
+ }
24145
24106
24146
- EVT FPMemVT = EVT::getFloatingPointVT(MemVT.getSizeInBits());
24147
- if (ST->isTruncatingStore() && FPMemVT != FPElemVT) {
24148
- SDValue Trunc = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
24149
- FPMemVT, Ext);
24150
- return DAG.getStore(ST->getChain(), DL, Trunc, ST->getBasePtr(),
24151
- ST->getMemOperand());
24107
+ SDValue ExtVector = Vector;
24108
+ if (!ExtCst || !ExtCst->isZero()) {
24109
+ // Handle extracting from lanes != 0.
24110
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24111
+ Value.getValueType(), Vector, ExtIdx);
24112
+ // FIXME: Using a fixed-size vector for the insertion should not be
24113
+ // necessary, but SVE ISEL is missing some folds to avoid fmovs.
24114
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24115
+ EVT InsertVectorVT = EVT::getVectorVT(
24116
+ *DAG.getContext(), ElemVT,
24117
+ VectorVT.getVectorElementCount().getKnownMinValue(), false);
24118
+ ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, InsertVectorVT,
24119
+ DAG.getUNDEF(InsertVectorVT), Ext, Zero);
24152
24120
}
24153
24121
24154
- return DAG.getStore(ST->getChain(), DL, Ext, ST->getBasePtr(),
24122
+ EVT FPMemVT = MemVT == MVT::i8
24123
+ ? MVT::aarch64mfp8
24124
+ : EVT::getFloatingPointVT(MemVT.getSizeInBits());
24125
+ SDValue FPSubreg = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
24126
+ FPMemVT, ExtVector);
24127
+
24128
+ return DAG.getStore(ST->getChain(), DL, FPSubreg, ST->getBasePtr(),
24155
24129
ST->getMemOperand());
24156
24130
}
24157
24131
@@ -28878,10 +28852,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
28878
28852
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
28879
28853
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28880
28854
28881
- // Can be lowered to a bsub store in ISEL.
28882
- if (VT == MVT::v1i64 && MemVT == MVT::v1i8)
28883
- return SDValue();
28884
-
28885
28855
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
28886
28856
EVT TruncVT = ContainerVT.changeVectorElementType(
28887
28857
Store->getMemoryVT().getVectorElementType());
0 commit comments