swiftlang
diff --git a/‎llvm/lib/Target/AArch64/AArch64Subtarget.h
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Target/AArch64/AArch64Subtarget.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Lines changed: 12 additions & 0 deletions b/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Lines changed: 12 additions & 0 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Lines changed: 5 additions & 0 deletions b/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Lines changed: 5 additions & 0 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
Lines changed: 8 additions & 8 deletions b/‎llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
Lines changed: 8 additions & 8 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
Lines changed: 6 additions & 6 deletions b/‎llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
Lines changed: 6 additions & 6 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AArch64/bswap.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/Analysis/CostModel/AArch64/bswap.ll
Lines changed: 1 addition & 1 deletion
@@ -103,7 +103,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 #include "AArch64GenSubtargetInfo.inc"
 
   uint8_t MaxInterleaveFactor = 2;
-  uint8_t VectorInsertExtractBaseCost = 3;
+  uint8_t VectorInsertExtractBaseCost = 2;
   uint16_t CacheLineSize = 0;
   uint16_t PrefetchDistance = 0;
   uint16_t MinPrefetchStride = 1;
 
@@ -2560,6 +2560,18 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
   return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
 }
 
+InstructionCost AArch64TTIImpl::getScalarizationOverhead(
+    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+    TTI::TargetCostKind CostKind) {
+  if (isa<ScalableVectorType>(Ty))
+    return InstructionCost::getInvalid();
+  if (Ty->getElementType()->isFloatingPointTy())
+    return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+                                           CostKind);
+  return DemandedElts.popcount() * (Insert + Extract) *
+         ST->getVectorInsertExtractBaseCost();
+}
+
 InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
     TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
 
@@ -385,6 +385,11 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
                                  VectorType *SubTp,
                                  ArrayRef<const Value *> Args = std::nullopt);
 
+  InstructionCost getScalarizationOverhead(VectorType *Ty,
+                                           const APInt &DemandedElts,
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind);
+
   /// Return the cost of the scaling factor used in the addressing
   /// mode represented by AM for this target, for a load/store
   /// of the specified type.
 
@@ -198,16 +198,16 @@ define i32 @fdiv(i32 %arg) {
 define i32 @frem(i32 %arg) {
 ; CHECK-LABEL: 'frem'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V16F16 = frem <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4F16 = frem <4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8F16 = frem <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16F16 = frem <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4F32 = frem <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8F32 = frem <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = frem <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = frem <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = frem <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = frem <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F16 = frem half undef, undef
 
@@ -355,9 +355,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @smul(i32 %arg) {
 ; RECIP-LABEL: 'smul'
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -437,9 +437,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @umul(i32 %arg) {
 ; RECIP-LABEL: 'umul'
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
 
@@ -44,7 +44,7 @@ define void @neon() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ define void @neon() {`
`44`	`44`	`; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)`
`45`	`45`	`; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)`
`46`	`46`	`; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef)`
`47`		`-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)`
	`47`	`+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)`
`48`	`48`	`; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void`
`49`	`49`	`;`
`50`	`50`	`%v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)`