@@ -1394,9 +1394,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1394
1394
}
1395
1395
}
1396
1396
1397
- // v1i64 -> v1i8 truncstore represents a bsub FPR8 store.
1398
- setTruncStoreAction(MVT::v1i64, MVT::v1i8, Legal);
1399
-
1400
1397
for (auto Op :
1401
1398
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1402
1399
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
@@ -23936,6 +23933,8 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
23936
23933
static unsigned getFPSubregForVT(EVT VT) {
23937
23934
assert(VT.isSimple() && "Expected simple VT");
23938
23935
switch (VT.getSimpleVT().SimpleTy) {
23936
+ case MVT::aarch64mfp8:
23937
+ return AArch64::bsub;
23939
23938
case MVT::f16:
23940
23939
return AArch64::hsub;
23941
23940
case MVT::f32:
@@ -23947,22 +23946,6 @@ static unsigned getFPSubregForVT(EVT VT) {
23947
23946
}
23948
23947
}
23949
23948
23950
- static EVT get64BitVector(EVT ElVT) {
23951
- assert(ElVT.isSimple() && "Expected simple VT");
23952
- switch (ElVT.getSimpleVT().SimpleTy) {
23953
- case MVT::i8:
23954
- return MVT::v8i8;
23955
- case MVT::i16:
23956
- return MVT::v4i16;
23957
- case MVT::i32:
23958
- return MVT::v2i32;
23959
- case MVT::i64:
23960
- return MVT::v1i64;
23961
- default:
23962
- llvm_unreachable("Unexpected VT!");
23963
- }
23964
- }
23965
-
23966
23949
static SDValue performSTORECombine(SDNode *N,
23967
23950
TargetLowering::DAGCombinerInfo &DCI,
23968
23951
SelectionDAG &DAG,
@@ -24041,72 +24024,63 @@ static SDValue performSTORECombine(SDNode *N,
24041
24024
SDValue ExtIdx = Value.getOperand(1);
24042
24025
EVT VectorVT = Vector.getValueType();
24043
24026
EVT ElemVT = VectorVT.getVectorElementType();
24027
+
24044
24028
if (!ValueVT.isInteger())
24045
24029
return SDValue();
24046
24030
if (ValueVT != MemVT && !ST->isTruncatingStore())
24047
24031
return SDValue();
24048
24032
24049
- if (MemVT == MVT::i8) {
24050
- auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
24051
- if (Subtarget->isNeonAvailable() &&
24052
- (VectorVT == MVT::v8i8 || VectorVT == MVT::v16i8) && ExtCst &&
24053
- !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
24054
- // These can lower to st1.b, which is preferable if we're unlikely to
24055
- // fold the addressing into the store.
24056
- return SDValue();
24057
- }
24058
-
24059
- // Lower as truncstore of v1i64 -> v1i8 (which can lower to a bsub store).
24060
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24061
- SDValue ExtVector;
24062
- EVT VecVT64 = get64BitVector(ElemVT);
24063
- if (ExtCst && ExtCst->isZero()) {
24064
- ExtVector =
24065
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT64, Vector, Zero);
24066
- } else {
24067
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24068
- Value.getValueType(), Vector, ExtIdx);
24069
- ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT64,
24070
- DAG.getUNDEF(VecVT64), Ext, Zero);
24071
- }
24072
-
24073
- SDValue Cast = DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, ExtVector);
24074
- return DAG.getTruncStore(ST->getChain(), DL, Cast, ST->getBasePtr(),
24075
- MVT::v1i8, ST->getMemOperand());
24076
- }
24077
-
24078
- // TODO: Handle storing i8s to wider types.
24079
- if (ElemVT == MVT::i8)
24033
+ // This could generate an additional extract if the index is non-zero and
24034
+ // the extracted value has multiple uses.
24035
+ auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
24036
+ if ((!ExtCst || !ExtCst->isZero()) && !Value.hasOneUse())
24080
24037
return SDValue();
24081
24038
24082
- // Heuristic: If there are other users of integer scalars extracted from
24083
- // this vector that won't fold into the store -- abandon folding. Applying
24084
- // this fold may extend the vector lifetime and disrupt paired stores.
24085
- for (const auto &Use : Vector->uses()) {
24086
- if (Use.getResNo() != Vector.getResNo())
24087
- continue;
24088
- const SDNode *User = Use.getUser();
24089
- if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24090
- (!User->hasOneUse() ||
24091
- (*User->user_begin())->getOpcode() != ISD::STORE))
24092
- return SDValue();
24039
+ if (Subtarget->isNeonAvailable() && ElemVT == MemVT &&
24040
+ (VectorVT.is64BitVector() || VectorVT.is128BitVector()) && ExtCst &&
24041
+ !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
24042
+ // These can lower to st1, which is preferable if we're unlikely to fold
24043
+ // the addressing into the store.
24044
+ return SDValue();
24093
24045
}
24094
24046
24095
- EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
24096
- EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
24097
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
24098
- SDValue Ext =
24099
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, FPElemVT, Cast, ExtIdx);
24047
+ if (MemVT == MVT::i64 || MemVT == MVT::i32) {
24048
+ // Heuristic: If there are other users of w/x integer scalars extracted
24049
+ // from this vector that won't fold into the store -- abandon folding.
24050
+ // Applying this fold may disrupt paired stores.
24051
+ for (const auto &Use : Vector->uses()) {
24052
+ if (Use.getResNo() != Vector.getResNo())
24053
+ continue;
24054
+ const SDNode *User = Use.getUser();
24055
+ if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24056
+ (!User->hasOneUse() ||
24057
+ (*User->user_begin())->getOpcode() != ISD::STORE))
24058
+ return SDValue();
24059
+ }
24060
+ }
24100
24061
24101
- EVT FPMemVT = EVT::getFloatingPointVT(MemVT.getSizeInBits());
24102
- if (ST->isTruncatingStore() && FPMemVT != FPElemVT) {
24103
- SDValue Trunc = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
24104
- FPMemVT, Ext);
24105
- return DAG.getStore(ST->getChain(), DL, Trunc, ST->getBasePtr(),
24106
- ST->getMemOperand());
24062
+ SDValue ExtVector = Vector;
24063
+ if (!ExtCst || !ExtCst->isZero()) {
24064
+ // Handle extracting from lanes != 0.
24065
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24066
+ Value.getValueType(), Vector, ExtIdx);
24067
+ // FIXME: Using a fixed-size vector for the insertion should not be
24068
+ // necessary, but SVE ISEL is missing some folds to avoid fmovs.
24069
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24070
+ EVT InsertVectorVT = EVT::getVectorVT(
24071
+ *DAG.getContext(), ElemVT,
24072
+ VectorVT.getVectorElementCount().getKnownMinValue(), false);
24073
+ ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, InsertVectorVT,
24074
+ DAG.getUNDEF(InsertVectorVT), Ext, Zero);
24107
24075
}
24108
24076
24109
- return DAG.getStore(ST->getChain(), DL, Ext, ST->getBasePtr(),
24077
+ EVT FPMemVT = MemVT == MVT::i8
24078
+ ? MVT::aarch64mfp8
24079
+ : EVT::getFloatingPointVT(MemVT.getSizeInBits());
24080
+ SDValue FPSubreg = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
24081
+ FPMemVT, ExtVector);
24082
+
24083
+ return DAG.getStore(ST->getChain(), DL, FPSubreg, ST->getBasePtr(),
24110
24084
ST->getMemOperand());
24111
24085
}
24112
24086
@@ -28861,10 +28835,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
28861
28835
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
28862
28836
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
28863
28837
28864
- // Can be lowered to a bsub store in ISEL.
28865
- if (VT == MVT::v1i64 && MemVT == MVT::v1i8)
28866
- return SDValue();
28867
-
28868
28838
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
28869
28839
EVT TruncVT = ContainerVT.changeVectorElementType(
28870
28840
Store->getMemoryVT().getVectorElementType());
0 commit comments