-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Instructions] cache computed shufflevector properties #115536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Instructions] cache computed shufflevector properties #115536
Conversation
@llvm/pr-subscribers-llvm-ir Author: Princeton Ferro (Prince781) ChangesCache computed properties of a shufflevector mask. Full diff: https://github.com/llvm/llvm-project/pull/115536.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index b6575d4c85724c..c60f2f021b3c89 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1848,6 +1848,39 @@ class ShuffleVectorInst : public Instruction {
SmallVector<int, 4> ShuffleMask;
Constant *ShuffleMaskForBitcode;
+ struct ShuffleProperties {
+ bool isSingleSource : 1;
+ bool isSingleSource_set : 1;
+ bool isIdentityMask : 1;
+ bool isIdentityMask_set : 1;
+ bool isIdentityWithPadding : 1;
+ bool isIdentityWithPadding_set : 1;
+ bool isIdentityWithExtract : 1;
+ bool isIdentityWithExtract_set : 1;
+ bool isConcat : 1;
+ bool isConcat_set : 1;
+ bool isSelect : 1;
+ bool isSelect_set : 1;
+ bool isReverse : 1;
+ bool isReverse_set : 1;
+ bool isZeroEltSplat : 1;
+ bool isZeroEltSplat_set : 1;
+ bool isTranspose : 1;
+ bool isTranspose_set : 1;
+
+ void unset() {
+ isSingleSource_set = false;
+ isIdentityMask_set = false;
+ isIdentityWithPadding_set = false;
+ isIdentityWithExtract_set = false;
+ isConcat_set = false;
+ isSelect_set = false;
+ isReverse_set = false;
+ isZeroEltSplat_set = false;
+ isTranspose_set = false;
+ }
+ };
+ mutable ShuffleProperties CachedShuffleProperties = {};
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
@@ -1959,8 +1992,13 @@ class ShuffleVectorInst : public Instruction {
/// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
/// TODO: Optionally allow length-changing shuffles.
bool isSingleSource() const {
- return !changesLength() &&
- isSingleSourceMask(ShuffleMask, ShuffleMask.size());
+ if (CachedShuffleProperties.isSingleSource_set)
+ return CachedShuffleProperties.isSingleSource;
+
+ CachedShuffleProperties.isSingleSource_set = true;
+ return CachedShuffleProperties.isSingleSource =
+ !changesLength() &&
+ isSingleSourceMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle mask chooses elements from exactly one source
@@ -1987,12 +2025,18 @@ class ShuffleVectorInst : public Instruction {
/// from its input vectors.
/// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
bool isIdentity() const {
+ if (CachedShuffleProperties.isIdentityMask_set)
+ return CachedShuffleProperties.isIdentityMask;
+
+ CachedShuffleProperties.isIdentityMask_set = true;
// Not possible to express a shuffle mask for a scalable vector for this
// case.
if (isa<ScalableVectorType>(getType()))
- return false;
+ return CachedShuffleProperties.isIdentityMask = false;
- return !changesLength() && isIdentityMask(ShuffleMask, ShuffleMask.size());
+ return CachedShuffleProperties.isIdentityMask =
+ !changesLength() &&
+ isIdentityMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle lengthens exactly one source vector with
@@ -2033,7 +2077,13 @@ class ShuffleVectorInst : public Instruction {
/// In that case, the shuffle is better classified as an identity shuffle.
/// TODO: Optionally allow length-changing shuffles.
bool isSelect() const {
- return !changesLength() && isSelectMask(ShuffleMask, ShuffleMask.size());
+ if (CachedShuffleProperties.isSelect_set)
+ return CachedShuffleProperties.isSelect;
+
+ CachedShuffleProperties.isSelect_set = true;
+ return CachedShuffleProperties.isSelect =
+ !changesLength() &&
+ isSelectMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle mask swaps the order of elements from exactly
@@ -2054,7 +2104,13 @@ class ShuffleVectorInst : public Instruction {
/// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
/// TODO: Optionally allow length-changing shuffles.
bool isReverse() const {
- return !changesLength() && isReverseMask(ShuffleMask, ShuffleMask.size());
+ if (CachedShuffleProperties.isReverse_set)
+ return CachedShuffleProperties.isReverse;
+
+ CachedShuffleProperties.isReverse_set = true;
+ return CachedShuffleProperties.isReverse =
+ !changesLength() &&
+ isReverseMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle mask chooses all elements with the same value
@@ -2077,8 +2133,13 @@ class ShuffleVectorInst : public Instruction {
/// TODO: Optionally allow length-changing shuffles.
/// TODO: Optionally allow splats from other elements.
bool isZeroEltSplat() const {
- return !changesLength() &&
- isZeroEltSplatMask(ShuffleMask, ShuffleMask.size());
+ if (CachedShuffleProperties.isZeroEltSplat_set)
+ return CachedShuffleProperties.isZeroEltSplat;
+
+ CachedShuffleProperties.isZeroEltSplat_set = true;
+ return CachedShuffleProperties.isZeroEltSplat =
+ !changesLength() &&
+ isZeroEltSplatMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle mask is a transpose mask.
@@ -2127,7 +2188,13 @@ class ShuffleVectorInst : public Instruction {
/// exact specification.
/// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
bool isTranspose() const {
- return !changesLength() && isTransposeMask(ShuffleMask, ShuffleMask.size());
+ if (CachedShuffleProperties.isTranspose_set)
+ return CachedShuffleProperties.isTranspose;
+
+ CachedShuffleProperties.isTranspose_set = true;
+ return CachedShuffleProperties.isTranspose =
+ !changesLength() &&
+ isTransposeMask(ShuffleMask, ShuffleMask.size());
}
/// Return true if this shuffle mask is a splice mask, concatenating the two
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 009e0c03957c97..987bf2a031fd2d 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1810,6 +1810,7 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
}
void ShuffleVectorInst::setShuffleMask(ArrayRef<int> Mask) {
+ CachedShuffleProperties.unset();
ShuffleMask.assign(Mask.begin(), Mask.end());
ShuffleMaskForBitcode = convertShuffleMaskForBitcode(Mask, getType());
}
@@ -2100,63 +2101,76 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
}
bool ShuffleVectorInst::isIdentityWithPadding() const {
+ if (CachedShuffleProperties.isIdentityWithPadding_set)
+ return CachedShuffleProperties.isIdentityWithPadding;
+
+ CachedShuffleProperties.isIdentityWithPadding_set = true;
// FIXME: Not currently possible to express a shuffle mask for a scalable
// vector for this case.
if (isa<ScalableVectorType>(getType()))
- return false;
+ return CachedShuffleProperties.isIdentityWithPadding = false;
int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
if (NumMaskElts <= NumOpElts)
- return false;
+ return CachedShuffleProperties.isIdentityWithPadding = false;
// The first part of the mask must choose elements from exactly 1 source op.
ArrayRef<int> Mask = getShuffleMask();
if (!isIdentityMaskImpl(Mask, NumOpElts))
- return false;
+ return CachedShuffleProperties.isIdentityWithPadding = false;
// All extending must be with undef elements.
for (int i = NumOpElts; i < NumMaskElts; ++i)
if (Mask[i] != -1)
- return false;
+ return CachedShuffleProperties.isIdentityWithPadding = false;
- return true;
+ return CachedShuffleProperties.isIdentityWithPadding = true;
}
bool ShuffleVectorInst::isIdentityWithExtract() const {
+ if (CachedShuffleProperties.isIdentityWithExtract_set)
+ return CachedShuffleProperties.isIdentityWithExtract;
+
+ CachedShuffleProperties.isIdentityWithExtract_set = true;
// FIXME: Not currently possible to express a shuffle mask for a scalable
// vector for this case.
if (isa<ScalableVectorType>(getType()))
- return false;
+ return CachedShuffleProperties.isIdentityWithExtract = false;
int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
if (NumMaskElts >= NumOpElts)
- return false;
+ return CachedShuffleProperties.isIdentityWithExtract = false;
- return isIdentityMaskImpl(getShuffleMask(), NumOpElts);
+ return CachedShuffleProperties.isIdentityWithExtract = isIdentityMaskImpl(getShuffleMask(), NumOpElts);
}
bool ShuffleVectorInst::isConcat() const {
+ if (CachedShuffleProperties.isConcat_set)
+ return CachedShuffleProperties.isConcat;
+
+ CachedShuffleProperties.isConcat_set = true;
// Vector concatenation is differentiated from identity with padding.
if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
- return false;
+ return CachedShuffleProperties.isConcat = false;
// FIXME: Not currently possible to express a shuffle mask for a scalable
// vector for this case.
if (isa<ScalableVectorType>(getType()))
- return false;
+ return CachedShuffleProperties.isConcat = false;
int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
if (NumMaskElts != NumOpElts * 2)
- return false;
+ return CachedShuffleProperties.isConcat = false;
// Use the mask length rather than the operands' vector lengths here. We
// already know that the shuffle returns a vector twice as long as the inputs,
// and neither of the inputs are undef vectors. If the mask picks consecutive
// elements from both inputs, then this is a concatenation of the inputs.
- return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
+ return CachedShuffleProperties.isConcat =
+ isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
}
static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
a44d200
to
6a332ad
Compare
@nikic If I'm not misreading these build logs, the errors look unrelated. Would it be possible for you to assign someone to review this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't see any compile-time impact when testing this, but probably this needs a fairly specific workload to see benefits. Could you maybe provide some numbers on what you see?
In terms of implementation, I do wonder whether it would make sense to always compute the full properties on construction / mask change, as the lazy initialization makes this somewhat ugly.
Hi @nikic, We have some internal kernels that make heavy use of very large vectors (256, 512, 1024 elements). We find a 20-40% compile time reduction on these by caching shufflevector mask properties. I think this impact is hard to see with smaller sizes. |
The lazy initialization is helpful for our compile time cases, so I'd prefer to keep it. I can refine this patch to be easier to read. |
We could consider creating a TTI::ShuffleKindSet mask type and create the entire set in one call in the ShuffleVectorInst constructor - the ShuffleVectorInst::is*Mask calls are all very similar, it shouldn't take much to merge them all into a single analysis loop. |
@RKSimon interesting idea. I'll try that out. |
aa7849a
to
18ac1aa
Compare
@RKSimon why would it be in TTI though? Aren't these target-independent properties? |
Nevermind, I see that ShuffleVectorInst methods are used in |
bfa9cb1
to
8e12ade
Compare
Hi all, Updated the PR to merge shufflevector attribute analyses into a single pass over the mask. I've left other users (like BasicTTIImpl) alone for now, as I don't have a real-world example to motivate changing them. |
Improvements on our (@NVIDIA's) end: before:
after this change:
after this change with
|
Ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may be cleaner to separate out the scalable case into a separate code path, because there is only a single supported scalable shufflevector operation (which is the zero index splat).
04b15aa
to
b6fd581
Compare
- Cache computed properties of a shufflevector mask. - Merge several shuffle mask property analyses into a single analysis and introduce ShuffleMaskAttrs. - Compute the properties on shufflevector construction.
This reverts commit 24a39050595b7f32e7374f67d281d91bfd3cf303.
Simplify implementation
b6fd581
to
3fd0f67
Compare
Ping reviewers. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
||
// (Re)compute the shuffle mask attributes. | ||
void computeShuffleAttrs() { | ||
ShuffleAttrs = {}; // reset |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not needed, as you're reassigning all members anyway.
Just in case, please hold off on merging as I need to do some more compile time testing. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM cheers - @Prince781 hows the compile time checking going?
Hi @RKSimon While this improves the original case I was interested in, I'm finding with a newer kernel we recently added that the 8b size increase for ShuffleVectorInst is causing (CPU) caching problems. So I'd like to still hold off on merging. |
Cache computed properties of a shufflevector mask.