rust-lang
diff --git a/‎lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Lines changed: 23 additions & 4 deletions b/‎lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Lines changed: 23 additions & 4 deletions
diff --git a/‎lib/Target/SystemZ/SystemZISelLowering.cpp
Lines changed: 26 additions & 10 deletions b/‎lib/Target/SystemZ/SystemZISelLowering.cpp
Lines changed: 26 additions & 10 deletions
diff --git a/‎test/CodeGen/AMDGPU/indirect-addressing-si.ll
Lines changed: 19 additions & 29 deletions b/‎test/CodeGen/AMDGPU/indirect-addressing-si.ll
Lines changed: 19 additions & 29 deletions
diff --git a/‎test/CodeGen/ARM/func-argpassing-endian.ll
Lines changed: 0 additions & 2 deletions b/‎test/CodeGen/ARM/func-argpassing-endian.ll
Lines changed: 0 additions & 2 deletions
@@ -242,7 +242,8 @@ namespace {
     }
 
     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
-    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
+    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+                                    bool AssumeSingleUse = false);
 
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
@@ -1064,11 +1065,12 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 /// Check the specified vector node value to see if it can be simplified or
 /// if things it uses can be simplified as it only uses some of the elements.
 /// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
-                                             const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+                                             bool AssumeSingleUse) {
   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownUndef, KnownZero;
-  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
+  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
+                                      0, AssumeSingleUse))
     return false;
 
   // Revisit the node.
@@ -15014,6 +15016,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
   }
 
+  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+  // simplify it based on the (valid) extraction indices.
+  if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+        return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+               Use->getOperand(0) == InVec &&
+               isa<ConstantSDNode>(Use->getOperand(1));
+      })) {
+    APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
+    for (SDNode *Use : InVec->uses()) {
+      auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+      if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+        DemandedElts.setBit(CstElt->getZExtValue());
+    }
+    if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+      return SDValue(N, 0);
+  }
+
   bool BCNumEltsChanged = false;
   EVT ExtVT = VT.getVectorElementType();
   EVT LVT = ExtVT;
 
@@ -3893,20 +3893,34 @@ static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
   return nullptr;
 }
 
-// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// Convert the mask of the given shuffle op into a byte-level mask,
 // as if it had type vNi8.
-static void getVPermMask(ShuffleVectorSDNode *VSN,
+static bool getVPermMask(SDValue ShuffleOp,
                          SmallVectorImpl<int> &Bytes) {
-  EVT VT = VSN->getValueType(0);
+  EVT VT = ShuffleOp.getValueType();
   unsigned NumElements = VT.getVectorNumElements();
   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
-  Bytes.resize(NumElements * BytesPerElement, -1);
-  for (unsigned I = 0; I < NumElements; ++I) {
-    int Index = VSN->getMaskElt(I);
-    if (Index >= 0)
+
+  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
+    Bytes.resize(NumElements * BytesPerElement, -1);
+    for (unsigned I = 0; I < NumElements; ++I) {
+      int Index = VSN->getMaskElt(I);
+      if (Index >= 0)
+        for (unsigned J = 0; J < BytesPerElement; ++J)
+          Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+    }
+    return true;
+  }
+  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
+      isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
+    unsigned Index = ShuffleOp.getConstantOperandVal(1);
+    Bytes.resize(NumElements * BytesPerElement, -1);
+    for (unsigned I = 0; I < NumElements; ++I)
       for (unsigned J = 0; J < BytesPerElement; ++J)
         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+    return true;
   }
+  return false;
 }
 
 // Bytes is a VPERM-like permute vector, except that -1 is used for
@@ -4075,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
       // See whether the bytes we need come from a contiguous part of one
       // operand.
       SmallVector<int, SystemZ::VectorBytes> OpBytes;
-      getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+      if (!getVPermMask(Op, OpBytes))
+        break;
       int NewByte;
       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
         break;
@@ -5109,13 +5124,14 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
     if (Opcode == ISD::BITCAST)
       // Look through bitcasts.
       Op = Op.getOperand(0);
-    else if (Opcode == ISD::VECTOR_SHUFFLE &&
+    else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
              canTreatAsByteVector(Op.getValueType())) {
       // Get a VPERM-like permute mask and see whether the bytes covered
       // by the extracted element are a contiguous sequence from one
       // source operand.
       SmallVector<int, SystemZ::VectorBytes> Bytes;
-      getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+      if (!getVPermMask(Op, Bytes))
+        break;
       int First;
       if (!getShuffleInput(Bytes, Index * BytesPerElement,
                            BytesPerElement, First))
 
@@ -480,38 +480,28 @@ bb7:                                              ; preds = %bb4, %bb1
 
 ; GCN-LABEL: {{^}}multi_same_block:
 
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
-; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
-; GCN-DAG: s_load_dword [[ARG:s[0-9]+]]
-; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16
-
-; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0
-; GCN-NOT: m0
-
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0
+; GCN: s_load_dword [[ARG:s[0-9]+]]
+
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; MOVREL: s_waitcnt
+; MOVREL: s_add_i32 m0, [[ARG]], -16
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0
+; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0
+; MOVREL: s_mov_b32 m0, -1
+
+
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
+; IDXMODE: s_waitcnt
+; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0
 ; IDXMODE: s_set_gpr_idx_off
-
-; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
-
-; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0
-
-; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
-; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
+; IDXMODE: s_set_gpr_idx_on [[ARG]], dst
+; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0
 ; IDXMODE: s_set_gpr_idx_off
 
-; PREGFX9: s_mov_b32 m0, -1
-; GFX9-NOT: s_mov_b32 m0
 ; GCN: ds_write_b32
 ; GCN: ds_write_b32
 ; GCN: s_endpgm
 
@@ -38,7 +38,6 @@ define void @arg_double( double %val ) {
 define void @arg_v4i32(<4 x i32> %vec ) {
 ; CHECK-LE-LABEL: arg_v4i32:
 ; CHECK-LE:       @ %bb.0:
-; CHECK-LE-NEXT:    vmov d17, r2, r3
 ; CHECK-LE-NEXT:    vmov d16, r0, r1
 ; CHECK-LE-NEXT:    movw r0, :lower16:var32
 ; CHECK-LE-NEXT:    movt r0, :upper16:var32
@@ -47,7 +46,6 @@ define void @arg_v4i32(<4 x i32> %vec ) {
 ;
 ; CHECK-BE-LABEL: arg_v4i32:
 ; CHECK-BE:       @ %bb.0:
-; CHECK-BE-NEXT:    vmov d17, r3, r2
 ; CHECK-BE-NEXT:    vmov d16, r1, r0
 ; CHECK-BE-NEXT:    movw r0, :lower16:var32
 ; CHECK-BE-NEXT:    movt r0, :upper16:var32