Skip to content

Commit 718d50d

Browse files
committed
[VectorCombine] foldPermuteOfBinops - prefer the new fold for matching costs.
Minor tweak to #114101 - as we're reducing the instruction count, we should prefer the fold if the old/new costs are the same.
1 parent 8e61aaa commit 718d50d

File tree

3 files changed

+54
-41
lines changed

3 files changed

+54
-41
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1478,7 +1478,9 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
14781478
LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I
14791479
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
14801480
<< "\n");
1481-
if (NewCost >= OldCost)
1481+
1482+
// If costs are equal, still fold as we reduce instruction count.
1483+
if (NewCost > OldCost)
14821484
return false;
14831485

14841486
Value *Shuf0 = Builder.CreateShuffleVector(Op00, Op01, NewMask0);

llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,39 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE
3-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE
2+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
44
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
55
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
6-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE
7-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE
6+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
7+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
88
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
99
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
1010

1111
define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
12-
; SSE-LABEL: @PR50392(
13-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
14-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
15-
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
16-
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
17-
; SSE-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
18-
; SSE-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
19-
; SSE-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
20-
; SSE-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
21-
; SSE-NEXT: ret <4 x double> [[SHUFFLE]]
12+
; SSE2-LABEL: @PR50392(
13+
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
14+
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
15+
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
16+
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
17+
; SSE2-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
18+
; SSE2-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
19+
; SSE2-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
20+
; SSE2-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
21+
; SSE2-NEXT: ret <4 x double> [[SHUFFLE]]
22+
;
23+
; SSE4-LABEL: @PR50392(
24+
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
25+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
26+
; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
27+
; SSE4-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
28+
; SSE4-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
29+
; SSE4-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
30+
; SSE4-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP3]], double [[ADD12]], i64 3
31+
; SSE4-NEXT: ret <4 x double> [[SHUFFLE]]
2232
;
2333
; AVX1-LABEL: @PR50392(
24-
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
25-
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
26-
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
27-
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
34+
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
35+
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
36+
; AVX1-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
2837
; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
2938
; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
3039
; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
@@ -61,3 +70,4 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
6170
}
6271
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
6372
; AVX: {{.*}}
73+
; SSE: {{.*}}
Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE
3-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE
2+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
44
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
55
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
6-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE
7-
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE
6+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE2
7+
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=SSE,SSE4
88
; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1
99
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2
1010

1111
define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
12-
; SSE-LABEL: @PR94546(
13-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
14-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
15-
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
16-
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
17-
; SSE-NEXT: ret <4 x double> [[TMP4]]
12+
; SSE2-LABEL: @PR94546(
13+
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
14+
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
15+
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
16+
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
17+
; SSE2-NEXT: ret <4 x double> [[TMP4]]
1818
;
19-
; AVX1-LABEL: @PR94546(
20-
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
21-
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
22-
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
23-
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
24-
; AVX1-NEXT: ret <4 x double> [[TMP4]]
19+
; SSE4-LABEL: @PR94546(
20+
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
21+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
22+
; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
23+
; SSE4-NEXT: ret <4 x double> [[TMP3]]
2524
;
26-
; AVX2-LABEL: @PR94546(
27-
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
28-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
29-
; AVX2-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
30-
; AVX2-NEXT: ret <4 x double> [[TMP3]]
25+
; AVX-LABEL: @PR94546(
26+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
27+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
28+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
29+
; AVX-NEXT: ret <4 x double> [[TMP3]]
3130
;
3231
%vecext = extractelement <4 x double> %a, i32 0
3332
%vecext1 = extractelement <4 x double> %a, i32 1
@@ -49,4 +48,6 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
4948
ret <4 x double> %shuffle
5049
}
5150
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
52-
; AVX: {{.*}}
51+
; AVX1: {{.*}}
52+
; AVX2: {{.*}}
53+
; SSE: {{.*}}

0 commit comments

Comments
 (0)