swiftlang
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Lines changed: 0 additions & 51 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Lines changed: 0 additions & 51 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
Lines changed: 26 additions & 22 deletions b/‎llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
Lines changed: 26 additions & 22 deletions
@@ -519,57 +519,6 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
     TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
     const Instruction *CxtI) {
-  EVT OrigTy = TLI->getValueType(DL, Ty);
-  if (!OrigTy.isSimple()) {
-    // FIXME: We're having to query the throughput cost so that the basic
-    // implementation tries to generate legalize and scalarization costs. Maybe
-    // we could hoist the scalarization code here?
-    if (CostKind != TTI::TCK_CodeSize)
-      return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
-                                           Opd1Info, Opd2Info, Opd1PropInfo,
-                                           Opd2PropInfo, Args, CxtI);
-    // Scalarization
-
-    // Check if any of the operands are vector operands.
-    int ISD = TLI->InstructionOpcodeToISD(Opcode);
-    assert(ISD && "Invalid opcode");
-
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-
-    bool IsFloat = Ty->isFPOrFPVectorTy();
-    // Assume that floating point arithmetic operations cost twice as much as
-    // integer operations.
-    unsigned OpCost = (IsFloat ? 2 : 1);
-
-    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-      // The operation is legal. Assume it costs 1.
-      // TODO: Once we have extract/insert subvector cost we need to use them.
-      return LT.first * OpCost;
-    }
-
-    if (!TLI->isOperationExpand(ISD, LT.second)) {
-      // If the operation is custom lowered, then assume that the code is twice
-      // as expensive.
-      return LT.first * 2 * OpCost;
-    }
-
-    // Else, assume that we need to scalarize this op.
-    // TODO: If one of the types get legalized by splitting, handle this
-    // similarly to what getCastInstrCost() does.
-    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
-      unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
-      InstructionCost Cost = getArithmeticInstrCost(
-          Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
-          Opd1PropInfo, Opd2PropInfo, Args, CxtI);
-      // Return the cost of multiple scalar invocation plus the cost of
-      // inserting and extracting the values.
-      SmallVector<Type *> Tys(Args.size(), Ty);
-      return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
-    }
-
-    // We don't know anything about this scalar instruction.
-    return OpCost;
-  }
 
   // Legalize the type.
   std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
@@ -12477,6 +12477,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
   if (Size <= 256)
     return Cost;
 
-  Cost.first = (Size + 255) / 256;
+  Cost.first += (Size + 255) / 256;
   return Cost;
 }
@@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i32'
@@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i32 = add i32 undef, undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @add_i32() #0 {
   %v6i32 = add <6 x i32> undef, undef
   %v7i32 = add <7 x i32> undef, undef
   %v8i32 = add <8 x i32> undef, undef
-  %v32i32 = add <32 x i32> undef, undef
+  %v9i32 = add <9 x i32> undef, undef
   ret void
 }
 
@@ -48,34 +48,22 @@ define amdgpu_kernel void @add_i64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i64'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i64 = add i64 undef, undef
   %v2i64 = add <2 x i64> undef, undef
   %v3i64 = add <3 x i64> undef, undef
   %v4i64 = add <4 x i64> undef, undef
   %v5i64 = add <5 x i64> undef, undef
-  %v6i64 = add <6 x i64> undef, undef
-  %v7i64 = add <7 x i64> undef, undef
-  %v8i64 = add <8 x i64> undef, undef
-  %v16i64 = add <16 x i64> undef, undef
   ret void
 }
 
@@ -87,6 +75,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'add_i16'
@@ -96,6 +86,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'add_i16'
@@ -105,6 +97,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'add_i16'
@@ -114,6 +108,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = add i16 undef, undef
@@ -122,6 +118,8 @@ define amdgpu_kernel void @add_i16() #0 {
   %v4i16 = add <4 x i16> undef, undef
   %v5i16 = add <5 x i16> undef, undef
   %v6i16 = add <6 x i16> undef, undef
+  %v16i16 = add <16 x i16> undef, undef
+  %v17i16 = add <17 x i16> undef, undef
   ret void
 }
 
@@ -133,6 +131,8 @@ define amdgpu_kernel void @add_i8() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i8'
@@ -142,6 +142,8 @@ define amdgpu_kernel void @add_i8() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8 = add i8 undef, undef
@@ -150,12 +152,14 @@ define amdgpu_kernel void @add_i8() #0 {
   %v4i8 = add <4 x i8> undef, undef
   %v5i8 = add <5 x i8> undef, undef
   %v6i8 = add <6 x i8> undef, undef
+  %v32i8 = add <32 x i8> undef, undef
+  %v33i8 = add <33 x i8> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @sub() #0 {
 ; FAST16-LABEL: 'sub'
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -165,7 +169,7 @@ define amdgpu_kernel void @sub() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'sub'
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -175,7 +179,7 @@ define amdgpu_kernel void @sub() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'sub'
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -185,7 +189,7 @@ define amdgpu_kernel void @sub() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'sub'
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -194,7 +198,7 @@ define amdgpu_kernel void @sub() #0 {
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %i8 = sub i16 undef, undef
+  %i8 = sub i8 undef, undef
   %i16 = sub i16 undef, undef
   %i32 = sub i32 undef, undef
   %i64 = sub i64 undef, undef
Original file line number	Diff line number	Diff line change
`@@ -12477,6 +12477,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,`
`12477`	`12477`	`if (Size <= 256)`
`12478`	`12478`	`return Cost;`
`12479`	`12479`
`12480`		`- Cost.first = (Size + 255) / 256;`
	`12480`	`+ Cost.first += (Size + 255) / 256;`
`12481`	`12481`	`return Cost;`
`12482`	`12482`	`}`