llvm
diff --git a/‎llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Lines changed: 37 additions & 8 deletions b/‎llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Lines changed: 37 additions & 8 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
Lines changed: 184 additions & 184 deletions b/‎llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
Lines changed: 184 additions & 184 deletions
diff --git a/‎llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
Lines changed: 2 additions & 2 deletions
@@ -1326,28 +1326,29 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
       int NumSubElts, SubIndex;
 
+      // TODO: move more of this inside improveShuffleKindFromMask.
       if (Shuffle->changesLength()) {
+        ArrayRef<int> Mask = Shuffle->getShuffleMask();
+
         // Treat a 'subvector widening' as a free shuffle.
         if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
           return 0;
 
         if (Shuffle->isExtractSubvectorMask(SubIndex))
           return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
-                                           Shuffle->getShuffleMask(), CostKind,
-                                           SubIndex, VecTy, Operands);
+                                           Mask, CostKind, SubIndex, VecTy,
+                                           Operands);
 
         if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
           return TargetTTI->getShuffleCost(
-              TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
-              CostKind, SubIndex,
+              TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
               FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
               Operands);
 
         int ReplicationFactor, VF;
         if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
-          APInt DemandedDstElts =
-              APInt::getZero(Shuffle->getShuffleMask().size());
-          for (auto I : enumerate(Shuffle->getShuffleMask())) {
+          APInt DemandedDstElts = APInt::getZero(Mask.size());
+          for (auto I : enumerate(Mask)) {
             if (I.value() != PoisonMaskElem)
               DemandedDstElts.setBit(I.index());
           }
@@ -1356,7 +1357,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
               DemandedDstElts, CostKind);
         }
 
-        return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
+        bool IsUnary = isa<UndefValue>(Operands[1]);
+        NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
+        SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end());
+
+        // Widening shuffle - widening the source(s) to the new length
+        // (treated as free - see above), and then perform the adjusted
+        // shuffle at that width.
+        if (Shuffle->increasesLength()) {
+          for (int &M : AdjustMask)
+            M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
+
+          return TargetTTI->getShuffleCost(
+              IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
+              AdjustMask, CostKind, 0, nullptr);
+        }
+
+        // Narrowing shuffle - perform shuffle at original wider width and
+        // then extract the lower elements.
+        AdjustMask.append(NumSubElts - Mask.size(), -1);
+
+        InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
+            IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
+            VecSrcTy, AdjustMask, CostKind, 0, nullptr);
+
+        SmallVector<int, 16> ExtractMask(Mask.size(), -1);
+        std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
+        return ShuffleCost + TargetTTI->getShuffleCost(
+                                 TTI::SK_ExtractSubvector, VecTy, ExtractMask,
+                                 CostKind, 0, VecSrcTy, Operands);
       }
 
       if (Shuffle->isIdentity())
 
@@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; VLEN128-LABEL: 'test_vXf64'
@@ -32,7 +32,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
 ; VLEN128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; VLEN128-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 ; VLEN128-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; VLEN128-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
+; VLEN128-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
 ; VLEN128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
 
@@ -56,8 +56,8 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) {
 ; TODO: getInstructionCost doesn't call getShuffleCost here because the shuffle changes length
 define {<4 x i8>, <4 x i8>} @deinterleave_2(<8 x i8> %v) {
 ; CHECK-LABEL: 'deinterleave_2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %res0 = insertvalue { <4 x i8>, <4 x i8> } poison, <4 x i8> %v0, 0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %res1 = insertvalue { <4 x i8>, <4 x i8> } %res0, <4 x i8> %v1, 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret { <4 x i8>, <4 x i8> } %res1