-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP][REVEC] Support more mask pattern usage in shufflevector. #106212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesFull diff: https://github.com/llvm/llvm-project/pull/106212.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4da65217070446..7d681c0bdb073c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -272,8 +272,7 @@ static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
/// A group has the following features
/// 1. All of value in a group are shufflevector.
/// 2. The mask of all shufflevector is isExtractSubvectorMask.
-/// 3. The mask of all shufflevector uses all of the elements of the source (and
-/// the elements are used in order).
+/// 3. The mask of all shufflevector uses all of the elements of the source.
/// e.g., it is 1 group (%0)
/// %1 = shufflevector <16 x i8> %0, <16 x i8> poison,
/// <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -309,7 +308,7 @@ static unsigned getShufflevectorNumGroups(ArrayRef<Value *> VL) {
auto *SV = cast<ShuffleVectorInst>(VL[I]);
Value *Src = SV->getOperand(0);
ArrayRef<Value *> Group = VL.slice(I, GroupSize);
- SmallVector<int> ExtractionIndex(SVNumElements);
+ SmallBitVector UsedIndex(SVNumElements);
if (!all_of(Group, [&](Value *V) {
auto *SV = cast<ShuffleVectorInst>(V);
// From the same source.
@@ -318,12 +317,11 @@ static unsigned getShufflevectorNumGroups(ArrayRef<Value *> VL) {
int Index;
if (!SV->isExtractSubvectorMask(Index))
return false;
- for (int I : seq<int>(Index, Index + SV->getShuffleMask().size()))
- ExtractionIndex.push_back(I);
+ UsedIndex.set(Index, Index + SV->getShuffleMask().size());
return true;
}))
return 0;
- if (!is_sorted(ExtractionIndex))
+ if (!UsedIndex.all())
return 0;
++NumGroup;
}
@@ -10195,11 +10193,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return VecCost;
};
if (SLPReVec && !E->isAltShuffle())
- return GetCostDiff(GetScalarCost, [](InstructionCost) {
- // shufflevector will be eliminated by instcombine because the
- // shufflevector masks are used in order (guaranteed by
- // getShufflevectorNumGroups). The vector cost is 0.
- return TTI::TCC_Free;
+ return GetCostDiff(GetScalarCost, [&](InstructionCost) {
+ return ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTy, calculateShufflevectorMask(E->Scalars));
});
return GetCostDiff(GetScalarCost, GetVectorCost);
}
@@ -13971,9 +13967,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- // The current shufflevector usage always duplicate the source.
- V = Builder.CreateShuffleVector(Src,
- calculateShufflevectorMask(E->Scalars));
+ assert(isa<ShuffleVectorInst>(Src) &&
+ "Not supported shufflevector usage.");
+ ShuffleVectorInst *SVSrc = cast<ShuffleVectorInst>(Src);
+ assert(isa<PoisonValue>(SVSrc->getOperand(1)) &&
+ "Not supported shufflevector usage.");
+ SmallVector<int> ThisMask(calculateShufflevectorMask(E->Scalars));
+ SmallVector<int> NewMask(ThisMask.size());
+ transform(ThisMask, NewMask.begin(),
+ [&SVSrc](int Mask) { return SVSrc->getShuffleMask()[Mask]; });
+ V = Builder.CreateShuffleVector(SVSrc->getOperand(0), NewMask);
propagateIRFlags(V, E->Scalars, VL0);
} else {
assert(E->isAltShuffle() &&
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index 6028a8b918941c..1fc0b0306d1194 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -34,17 +34,9 @@ define void @test2(ptr %in, ptr %out) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP1]] to <4 x i64>
-; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 16
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 32
-; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[OUT]], align 8
-; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP7]], align 8
-; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[OUT:%.*]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -67,3 +59,26 @@ entry:
store <2 x i64> %8, ptr %12, align 8
ret void
}
+
+define void @test3(<16 x i32> %0, ptr %out) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+ %3 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %4 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %5 = getelementptr inbounds i32, ptr %out, i64 0
+ %6 = getelementptr inbounds i32, ptr %out, i64 4
+ %7 = getelementptr inbounds i32, ptr %out, i64 8
+ %8 = getelementptr inbounds i32, ptr %out, i64 12
+ store <4 x i32> %1, ptr %5, align 4
+ store <4 x i32> %2, ptr %6, align 4
+ store <4 x i32> %3, ptr %7, align 4
+ store <4 x i32> %4, ptr %8, align 4
+ ret void
+}
|
bool IsIdentity = true; | ||
auto *SV = cast<ShuffleVectorInst>(VL.front()); | ||
unsigned SVNumElements = | ||
cast<FixedVectorType>(SV->getOperand(0)->getType()) | ||
->getNumElements(); | ||
unsigned GroupSize = SVNumElements / SV->getShuffleMask().size(); | ||
for (size_t I = 0, E = VL.size(); I != E; I += GroupSize) { | ||
ArrayRef<Value *> Group = VL.slice(I, GroupSize); | ||
SmallVector<int> ExtractionIndex(SVNumElements); | ||
for_each(Group, [&](Value *V) { | ||
auto *SV = cast<ShuffleVectorInst>(V); | ||
int Index; | ||
SV->isExtractSubvectorMask(Index); | ||
for (int I : | ||
seq<int>(Index, Index + SV->getShuffleMask().size())) | ||
ExtractionIndex.push_back(I); | ||
}); | ||
if (!is_sorted(ExtractionIndex)) { | ||
IsIdentity = false; | ||
break; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I rather doubt you need all this, just a heck for identity should be enough
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can vectorize the following instructions and eliminate shufflevector.
%5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
%6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
%7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
%8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
But this cannot.
%5 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
%6 = shufflevector <4 x i64> %3, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
%7 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
%8 = shufflevector <4 x i64> %4, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
We need to check all shufflevector and know whether there is an identity mask.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But you don't need to check for sorted array, just need to check that each next extract mask starting index > than the previous one
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right. Fixed.
SV->isExtractSubvectorMask(Index); | ||
if (NextIndex != Index) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, I think better just to include the cost of the vector extract here, rather than consider it free. Better to request the cost from TTI. Also, check that SV->isExtractSubvectorMask(Index);
returns true
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When do we need to calculate the vector extract here? Vectorization will merge multiple shufflevector into 1 shufflevector. Vector extract is not used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In line 10212 you're calling SV->isExtractSubvectorMask(). Check its return value
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix in 25b9018.
return true; | ||
})) | ||
return 0; | ||
if (!is_sorted(ExtractionIndex)) | ||
if (!UsedIndex.all()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if we just reuse the same group several times but some other group do not use at all?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have an example? I am not sure what you mean.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean, <0,1> extracted several time, while <2,3> is not extracted at all
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cannot. We had a getShufflevectorNumGroups
in getScalarsVectorizationState
. If getShufflevectorNumGroups
returns 0, then buildTree_rec
will stop (a TreeEntry::NeedToGather
will return).
getShufflevectorNumGroups
use
SmallVector<int> ExtractionIndex(SVNumElements);
if (!all_of(Group, [&](Value *V) {
auto *SV = cast<ShuffleVectorInst>(V);
// From the same source.
if (SV->getOperand(0) != Src)
return false;
int Index;
if (!SV->isExtractSubvectorMask(Index))
return false;
for (int I : seq<int>(Index, Index + SV->getShuffleMask().size()))
ExtractionIndex.push_back(I);
return true;
}))
return 0;
if (!is_sorted(ExtractionIndex))
return 0;
to make sure each elements will be accessed.
@@ -309,7 +308,7 @@ static unsigned getShufflevectorNumGroups(ArrayRef<Value *> VL) { | |||
auto *SV = cast<ShuffleVectorInst>(VL[I]); | |||
Value *Src = SV->getOperand(0); | |||
ArrayRef<Value *> Group = VL.slice(I, GroupSize); | |||
SmallVector<int> ExtractionIndex(SVNumElements); | |||
SmallBitVector UsedIndex(SVNumElements); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need to account all elements here, just group indices should be enough, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. I will fix it.
SmallVector<int> ExpectedIndex( | ||
createStrideMask(0, ShuffleMaskSize, GroupSize)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can still use bitvector, just mark not elements in the groups, but the groups themselves.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
IsIdentity = false; | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do a return here, no need to modify IsIdentity
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/6060 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/4492 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/4521 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/4478 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/4401 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/3965 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/4403 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/3837 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/4952 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/3754 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/7702 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/4276 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/2293 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/6355 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/4614 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/6532 Here is the relevant piece of the build log for the reference
|
No description provided.