Skip to content

Commit 0baa6a7

Browse files
authored
[VectorCombine] foldShuffleOfShuffles - relax one-use of inner shuffles (#116062)
Allow multi-use of either of the inner shuffles and account for that in the cost comparison.
1 parent fd8d433 commit 0baa6a7

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,11 +1721,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
17211721
Value *V0, *V1;
17221722
UndefValue *U0, *U1;
17231723
ArrayRef<int> OuterMask, InnerMask0, InnerMask1;
1724-
if (!match(&I, m_Shuffle(m_OneUse(m_Shuffle(m_Value(V0), m_UndefValue(U0),
1725-
m_Mask(InnerMask0))),
1726-
m_OneUse(m_Shuffle(m_Value(V1), m_UndefValue(U1),
1727-
m_Mask(InnerMask1))),
1728-
m_Mask(OuterMask))))
1724+
if (!match(&I,
1725+
m_Shuffle(
1726+
m_Shuffle(m_Value(V0), m_UndefValue(U0), m_Mask(InnerMask0)),
1727+
m_Shuffle(m_Value(V1), m_UndefValue(U1), m_Mask(InnerMask1)),
1728+
m_Mask(OuterMask))))
17291729
return false;
17301730

17311731
auto *ShufI0 = dyn_cast<Instruction>(I.getOperand(0));
@@ -1769,17 +1769,24 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
17691769
// Try to merge the shuffles if the new shuffle is not costly.
17701770
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
17711771

1772-
InstructionCost OldCost =
1772+
InstructionCost InnerCost0 =
17731773
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1774-
InnerMask0, CostKind, 0, nullptr, {V0, U0}, ShufI0) +
1774+
InnerMask0, CostKind, 0, nullptr, {V0, U0}, ShufI0);
1775+
InstructionCost InnerCost1 =
17751776
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1776-
InnerMask1, CostKind, 0, nullptr, {V1, U1}, ShufI1) +
1777+
InnerMask1, CostKind, 0, nullptr, {V1, U1}, ShufI1);
1778+
InstructionCost OuterCost =
17771779
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy,
17781780
OuterMask, CostKind, 0, nullptr, {ShufI0, ShufI1}, &I);
1781+
InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
17791782

17801783
InstructionCost NewCost =
17811784
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleSrcTy,
17821785
NewMask, CostKind, 0, nullptr, {V0, V1});
1786+
if (!ShufI0->hasOneUse())
1787+
NewCost += InnerCost0;
1788+
if (!ShufI1->hasOneUse())
1789+
NewCost += InnerCost1;
17831790

17841791
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two shuffles: " << I
17851792
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,11 +1080,9 @@ define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) {
10801080

10811081
define <8 x i8> @operandbundles_first(<8 x i8> %a) {
10821082
; CHECK-LABEL: @operandbundles_first(
1083-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1084-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
1083+
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
10851084
; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
1086-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1087-
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
1085+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[A]], i1 false)
10881086
; CHECK-NEXT: ret <8 x i8> [[R]]
10891087
;
10901088
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -1098,10 +1096,8 @@ define <8 x i8> @operandbundles_first(<8 x i8> %a) {
10981096
define <8 x i8> @operandbundles_second(<8 x i8> %a) {
10991097
; CHECK-LABEL: @operandbundles_second(
11001098
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1101-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
11021099
; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
1103-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1104-
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
1100+
; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[A]], i1 false)
11051101
; CHECK-NEXT: ret <8 x i8> [[R]]
11061102
;
11071103
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

0 commit comments

Comments
 (0)