llvm
diff --git a/‎llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
Lines changed: 28 additions & 28 deletions b/‎llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll
Lines changed: 28 additions & 28 deletions
diff --git a/‎llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
Lines changed: 13 additions & 30 deletions b/‎llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
Lines changed: 13 additions & 30 deletions
@@ -1775,7 +1775,7 @@ InstructionCost X86TTIImpl::getShuffleCost(
   }
 
   // For 2-input shuffles, we must account for splitting the 2 inputs into many.
-  if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
+  if (Kind == TTI::SK_PermuteTwoSrc && !IsInLaneShuffle && LT.first != 1) {
     // We assume that source and destination have the same vector type.
     InstructionCost NumOfDests = LT.first;
     InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
 
@@ -1,32 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE,SSE4
-; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE,SSE4
-; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s
 
 define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
-; SSE2-LABEL: @PR94546(
-; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
-; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
-; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; SSE2-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
-; SSE2-NEXT:    ret <4 x double> [[TMP4]]
-;
-; SSE4-LABEL: @PR94546(
-; SSE4-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
-; SSE4-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
-; SSE4-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; SSE4-NEXT:    ret <4 x double> [[TMP3]]
-;
-; AVX-LABEL: @PR94546(
-; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
-; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
-; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    ret <4 x double> [[TMP3]]
+; CHECK-LABEL: @PR94546(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
 ;
   %vecext = extractelement <4 x double> %a, i32 0
   %vecext1 = extractelement <4 x double> %a, i32 1
@@ -47,7 +34,3 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
   %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 poison, i32 3>
   ret <4 x double> %shuffle
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX1: {{.*}}
-; AVX2: {{.*}}
-; SSE: {{.*}}
Original file line number	Diff line number	Diff line change
`@@ -1775,7 +1775,7 @@ InstructionCost X86TTIImpl::getShuffleCost(`
`1775`	`1775`	`}`
`1776`	`1776`
`1777`	`1777`	`// For 2-input shuffles, we must account for splitting the 2 inputs into many.`
`1778`		`- if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {`
	`1778`	`+ if (Kind == TTI::SK_PermuteTwoSrc && !IsInLaneShuffle && LT.first != 1) {`
`1779`	`1779`	`// We assume that source and destination have the same vector type.`
`1780`	`1780`	`InstructionCost NumOfDests = LT.first;`
`1781`	`1781`	`InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;`