@@ -23938,6 +23938,20 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
23938
23938
return Chain;
23939
23939
}
23940
23940
23941
+ static unsigned getFPSubregForVT(EVT VT) {
23942
+ assert(VT.isSimple() && "Expected simple VT");
23943
+ switch (VT.getSimpleVT().SimpleTy) {
23944
+ case MVT::f16:
23945
+ return AArch64::hsub;
23946
+ case MVT::f32:
23947
+ return AArch64::ssub;
23948
+ case MVT::f64:
23949
+ return AArch64::dsub;
23950
+ default:
23951
+ llvm_unreachable("Unexpected VT!");
23952
+ }
23953
+ }
23954
+
23941
23955
static SDValue performSTORECombine(SDNode *N,
23942
23956
TargetLowering::DAGCombinerInfo &DCI,
23943
23957
SelectionDAG &DAG,
@@ -23998,15 +24012,58 @@ static SDValue performSTORECombine(SDNode *N,
23998
24012
if (SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
23999
24013
return Store;
24000
24014
24001
- if (ST->isTruncatingStore()) {
24002
- EVT StoreVT = ST->getMemoryVT();
24003
- if (!isHalvingTruncateOfLegalScalableType(ValueVT, StoreVT))
24004
- return SDValue();
24015
+ if (ST->isTruncatingStore() &&
24016
+ isHalvingTruncateOfLegalScalableType(ValueVT, MemVT)) {
24005
24017
if (SDValue Rshrnb =
24006
24018
trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) {
24007
24019
return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
24008
- StoreVT, ST->getMemOperand());
24020
+ MemVT, ST->getMemOperand());
24021
+ }
24022
+ }
24023
+
24024
+ // This is an integer vector_extract_elt followed by a (possibly truncating)
24025
+ // store. We may be able to replace this with a store of an FP subregister.
24026
+ if (DCI.isAfterLegalizeDAG() && ST->isUnindexed() &&
24027
+ Value.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
24028
+
24029
+ SDValue Vector = Value.getOperand(0);
24030
+ SDValue ExtIdx = Value.getOperand(1);
24031
+ EVT VectorVT = Vector.getValueType();
24032
+ EVT ElemVT = VectorVT.getVectorElementType();
24033
+ if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
24034
+ return SDValue();
24035
+ if (ValueVT != MemVT && !ST->isTruncatingStore())
24036
+ return SDValue();
24037
+
24038
+ // Heuristic: If there are other users of integer scalars extracted from
24039
+ // this vector that won't fold into the store -- abandon folding. Applying
24040
+ // this fold may extend the vector lifetime and disrupt paired stores.
24041
+ for (const auto &Use : Vector->uses()) {
24042
+ if (Use.getResNo() != Vector.getResNo())
24043
+ continue;
24044
+ const SDNode *User = Use.getUser();
24045
+ if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24046
+ (!User->hasOneUse() ||
24047
+ (*User->user_begin())->getOpcode() != ISD::STORE))
24048
+ return SDValue();
24009
24049
}
24050
+
24051
+ EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
24052
+ EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
24053
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
24054
+ SDValue Ext =
24055
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, FPElemVT, Cast, ExtIdx);
24056
+
24057
+ EVT FPMemVT = EVT::getFloatingPointVT(MemVT.getSizeInBits());
24058
+ if (ST->isTruncatingStore() && FPMemVT != FPElemVT) {
24059
+ SDValue Trunc = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
24060
+ FPMemVT, Ext);
24061
+ return DAG.getStore(ST->getChain(), DL, Trunc, ST->getBasePtr(),
24062
+ ST->getMemOperand());
24063
+ }
24064
+
24065
+ return DAG.getStore(ST->getChain(), DL, Ext, ST->getBasePtr(),
24066
+ ST->getMemOperand());
24010
24067
}
24011
24068
24012
24069
return SDValue();
0 commit comments