swiftlang
diff --git a/‎llvm/include/llvm/CodeGen/TargetLowering.h
Lines changed: 0 additions & 8 deletions b/‎llvm/include/llvm/CodeGen/TargetLowering.h
Lines changed: 0 additions & 8 deletions
diff --git a/‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Lines changed: 4 additions & 5 deletions b/‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Lines changed: 4 additions & 5 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 2 additions & 46 deletions b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 2 additions & 46 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/R600ISelLowering.h
Lines changed: 0 additions & 9 deletions b/‎llvm/lib/Target/AMDGPU/R600ISelLowering.h
Lines changed: 0 additions & 9 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
Lines changed: 10 additions & 3 deletions b/‎llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
Lines changed: 10 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll
Lines changed: 0 additions & 223 deletions b/‎llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll
Lines changed: 0 additions & 223 deletions
@@ -1271,14 +1271,6 @@ class TargetLoweringBase {
        getTruncStoreAction(ValVT, MemVT) == Custom);
   }
 
-  virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
-                                    bool LegalOnly) const {
-    if (LegalOnly)
-      return isTruncStoreLegal(ValVT, MemVT);
-
-    return isTruncStoreLegalOrCustom(ValVT, MemVT);
-  }
-
   /// Return how the indexed load should be treated: either it is legal, needs
   /// to be promoted to a larger size, needs to be expanded to some other code
   /// sequence, or the target has a custom expander for it.
 
@@ -18089,11 +18089,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   // truncating store.  We can do this even if this is already a truncstore.
-  if ((Value.getOpcode() == ISD::FP_ROUND ||
-       Value.getOpcode() == ISD::TRUNCATE) &&
-      Value.getNode()->hasOneUse() && ST->isUnindexed() &&
-      TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
-                               ST->getMemoryVT(), LegalOperations)) {
+  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+      && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+                            ST->getMemoryVT())) {
     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   }
 
@@ -1249,13 +1249,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       }
     }
 
-    // SVE supports truncating stores of 64 and 128-bit vectors
-    setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
-    setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
-    setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
-    setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
-    setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
-
     for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
                     MVT::nxv4f32, MVT::nxv2f64}) {
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
@@ -1503,16 +1496,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
     setCondCodeAction(ISD::SETUNE, VT, Expand);
   }
 
-  // Mark integer truncating stores as having custom lowering
-  if (VT.isInteger()) {
-    MVT InnerVT = VT.changeVectorElementType(MVT::i8);
-    while (InnerVT != VT) {
-      setTruncStoreAction(VT, InnerVT, Custom);
-      InnerVT = InnerVT.changeVectorElementType(
-          MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
-    }
-  }
-
   // Lower fixed length vector operations to scalable equivalents.
   setOperationAction(ISD::ABS, VT, Custom);
   setOperationAction(ISD::ADD, VT, Custom);
@@ -4559,7 +4542,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
   EVT MemVT = StoreNode->getMemoryVT();
 
   if (VT.isVector()) {
-    if (useSVEForFixedLengthVectorVT(VT, true))
+    if (useSVEForFixedLengthVectorVT(VT))
       return LowerFixedLengthVectorStoreToSVE(Op, DAG);
 
     unsigned AS = StoreNode->getAddressSpace();
@@ -4571,8 +4554,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
       return scalarizeVectorStore(StoreNode, DAG);
     }
 
-    if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
-        MemVT == MVT::v4i8) {
+    if (StoreNode->isTruncatingStore()) {
       return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
     }
     // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
@@ -15354,29 +15336,6 @@ static bool performTBISimplification(SDValue Addr,
   return false;
 }
 
-static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
-  assert(N->getOpcode() == ISD::STORE ||
-         N->getOpcode() == ISD::MSTORE && "Expected STORE dag node in input!");
-
-  if (auto Store = dyn_cast<StoreSDNode>(N)) {
-    if (!Store->isTruncatingStore() || Store->isIndexed())
-      return SDValue();
-    SDValue Ext = Store->getValue();
-    auto ExtOpCode = Ext.getOpcode();
-    if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
-        ExtOpCode != ISD::ANY_EXTEND)
-      return SDValue();
-    SDValue Orig = Ext->getOperand(0);
-    if (Store->getMemoryVT() != Orig->getValueType(0))
-      return SDValue();
-    return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
-                        Store->getBasePtr(), Store->getPointerInfo(),
-                        Store->getAlign());
-  }
-
-  return SDValue();
-}
-
 static SDValue performSTORECombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    SelectionDAG &DAG,
@@ -15388,9 +15347,6 @@ static SDValue performSTORECombine(SDNode *N,
       performTBISimplification(N->getOperand(2), DCI, DAG))
     return SDValue(N, 0);
 
-  if (SDValue Store = foldTruncStoreOfExt(DAG, N))
-    return Store;
-
   return SDValue();
 }
 
 
@@ -54,15 +54,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
       bool *IsFast = nullptr) const override;
 
-  virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
-                                    bool LegalOperations) const override {
-    // R600 has "custom" lowering for truncating stores despite not supporting
-    // those instructions. If we allow that custom lowering in the DAG combiner
-    // then all truncates are merged into truncating stores, giving worse code
-    // generation. This hook prevents the DAG combiner performing that combine.
-    return isTruncStoreLegal(ValVT, MemVT);
-  }
-
 private:
   unsigned Gen;
   /// Each OpenCL kernel has nine implicit parameters that are stored in the
 
@@ -36,7 +36,10 @@ define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 {
 ; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
 ; CHECK-NEXT: ld1sb { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
 ; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
-; CHECK-NEXT: st1b { z[[XTN]].s }, [[PG0]],  [x0]
+; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
+; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
+; CHECK-NEXT: strb [[RES_LO]], [x0]
+; CHECK-NEXT: strb [[RES_HI]], [x0, #1]
 ; CHECK-NEXT: ret
   %cval = load <2 x i8>, <2 x i8>* %a
   %ptrs = load <2 x i8*>, <2 x i8*>* %b
@@ -58,7 +61,8 @@ define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 {
 ; CHECK-NEXT: ld1sb { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
 ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s
 ; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h
-; CHECK-NEXT: st1b { z[[UZP2]].h }, [[PG0]], [x0]
+; CHECK-NEXT: uzp1 v[[UZP3:[0-9]+]].8b, v[[UZP2]].8b, v[[UZP2]].8b
+; CHECK-NEXT: str s[[UZP3]], [x0]
 ; CHECK-NEXT: ret
   %cval = load <4 x i8>, <4 x i8>* %a
   %ptrs = load <4 x i8*>, <4 x i8*>* %b
@@ -175,7 +179,10 @@ define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 {
 ; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
 ; CHECK-NEXT: ld1sh { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
 ; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
-; CHECK-NEXT: st1h { z[[RES]].s }, [[PG0]], [x0]
+; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
+; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
+; CHECK-NEXT: strh [[RES_LO]], [x0]
+; CHECK-NEXT: strh [[RES_HI]], [x0, #2]
 ; CHECK-NEXT: ret
   %cval = load <2 x i16>, <2 x i16>* %a
   %ptrs = load <2 x i16*>, <2 x i16*>* %b