-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP][REVEC] Make getAltInstrMask and getGatherCost vectorize vector instructions. #99461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesFull diff: https://github.com/llvm/llvm-project/pull/99461.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ccb6734d5618c..ef9bcb2c515a2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1032,10 +1032,13 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
/// Opcode1.
SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
unsigned Opcode1) {
- SmallBitVector OpcodeMask(VL.size(), false);
+ Type *ScalarTy = VL[0]->getType();
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
for (unsigned Lane : seq<unsigned>(VL.size()))
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
- OpcodeMask.set(Lane);
+ for (unsigned I = 0; I != ScalarTyNumElements; ++I)
+ OpcodeMask.set(Lane * ScalarTyNumElements + I);
return OpcodeMask;
}
@@ -11355,7 +11358,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
- APInt ShuffledElements = APInt::getZero(VL.size());
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
DenseMap<Value *, unsigned> UniqueElements;
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost;
@@ -11375,7 +11379,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
Value *V = VL[I];
// No need to shuffle duplicates for constants.
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
- ShuffledElements.setBit(I);
+ for (unsigned J = 0; J != ScalarTyNumElements; ++J)
+ ShuffledElements.setBit(I * ScalarTyNumElements + J);
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
continue;
}
@@ -11388,7 +11393,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
}
DuplicateNonConst = true;
- ShuffledElements.setBit(I);
+ for (unsigned J = 0; J != ScalarTyNumElements; ++J)
+ ShuffledElements.setBit(I * ScalarTyNumElements + J);
ShuffleMask[I] = Res.first->second;
}
if (ForPoisonSrc)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a test
4585efa
to
3ab8fe4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
3ab8fe4
to
d04b0f8
Compare
Fix "Mask and VecTy are incompatible".
Fix "Vector size mismatch".
d04b0f8
to
dd30da7
Compare
No description provided.