-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. #98269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesFull implementation is in https://github.com/HanKuanChen/llvm-project/tree/slp-revec. Full diff: https://github.com/llvm/llvm-project/pull/98269.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1e9dd8c1e2287..a948f7629c705 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -113,6 +113,10 @@ static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
+static cl::opt<bool>
+ RunSLPReVectorization("revectorize-slp", cl::init(false), cl::Hidden,
+ cl::desc("Run the SLP revectorization passes"));
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -227,15 +231,26 @@ static const unsigned MaxPHINumOperands = 128;
/// avoids spending time checking the cost model and realizing that they will
/// be inevitably scalarized.
static bool isValidElementType(Type *Ty) {
+ if (RunSLPReVectorization && isa<FixedVectorType>(Ty))
+ Ty = Ty->getScalarType();
return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
!Ty->isPPC_FP128Ty();
}
/// \returns the vector type of ScalarTy based on vectorization factor.
static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ return FixedVectorType::get(VecTy->getElementType(),
+ VF * VecTy->getNumElements());
return FixedVectorType::get(ScalarTy, VF);
}
+static unsigned getNumElements(Type *Ty) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+}
+
/// \returns True if the value is a constant (but not globals/constant
/// expressions).
static bool isConstant(Value *V) {
@@ -6779,21 +6794,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- // Don't handle vectors.
- if (S.OpValue->getType()->isVectorTy() &&
- !isa<InsertElementInst>(S.OpValue)) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
- return;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
- if (SI->getValueOperand()->getType()->isVectorTy()) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+ if (!RunSLPReVectorization) {
+ // Don't handle vectors.
+ if (S.OpValue->getType()->isVectorTy() &&
+ !isa<InsertElementInst>(S.OpValue)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
}
+ if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
+ if (SI->getValueOperand()->getType()->isVectorTy()) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
+ }
+
// If all of the operands are identical or constant we have a simple solution.
// If we deal with insert/extract instructions, they all must have constant
// indices, otherwise we should gather them, not try to vectorize.
@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
Value *castToScalarTyElem(Value *V,
std::optional<bool> IsSigned = std::nullopt) {
auto *VecTy = cast<VectorType>(V->getType());
- if (VecTy->getElementType() == ScalarTy)
+ if (VecTy->getElementType() == ScalarTy->getScalarType())
return V;
return Builder.CreateIntCast(
- V, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ V, VectorType::get(ScalarTy->getScalarType(), VecTy->getElementCount()),
IsSigned.value_or(!isKnownNonNegative(V, SimplifyQuery(*R.DL))));
}
@@ -12195,7 +12212,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
return ShuffleBuilder.finalize(std::nullopt);
};
Value *V = vectorizeTree(VE, PostponedPHIs);
- if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+ if ((VF * getNumElements(VL[0]->getType())) !=
+ cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
// Reshuffle to get only unique values.
// If some of the scalars are duplicated in the vectorization
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
new file mode 100644
index 0000000000000..3b9b242eef7ad
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -revectorize-slp -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
+
+define void @test1(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
+ %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
+ %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12
+ %0 = load <4 x i32>, ptr %a, align 4
+ %1 = load <4 x i32>, ptr %arrayidx3, align 4
+ %2 = load <4 x i32>, ptr %arrayidx7, align 4
+ %3 = load <4 x i32>, ptr %arrayidx11, align 4
+ %arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4
+ %arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8
+ %arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12
+ %4 = load <4 x i32>, ptr %b, align 4
+ %5 = load <4 x i32>, ptr %arrayidx19, align 4
+ %6 = load <4 x i32>, ptr %arrayidx23, align 4
+ %7 = load <4 x i32>, ptr %arrayidx27, align 4
+ %add.i = add <4 x i32> %4, %0
+ %add.i63 = add <4 x i32> %5, %1
+ %add.i64 = add <4 x i32> %6, %2
+ %add.i65 = add <4 x i32> %7, %3
+ %arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4
+ %arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8
+ %arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12
+ store <4 x i32> %add.i, ptr %c, align 4
+ store <4 x i32> %add.i63, ptr %arrayidx36, align 4
+ store <4 x i32> %add.i64, ptr %arrayidx39, align 4
+ store <4 x i32> %add.i65, ptr %arrayidx42, align 4
+ ret void
+}
|
if (RunSLPReVectorization && isa<FixedVectorType>(Ty)) | ||
Ty = Ty->getScalarType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need the check here? Better just use Ty->getScalarTy() directly here and check for RunSLPReVectorization only where the vectorization is requested
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should keep SLPReVec
since some FixedVectorType are accidentally passed into isValidElementType
.
return FixedVectorType::get(ScalarTy, VF); | ||
} | ||
|
||
static unsigned getNumElements(Type *Ty) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment with the function description. Also, will it return 1 for scalable vectors too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Scalable vectors are not supported right now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the assertion for scalable vectors
if (!RunSLPReVectorization) { | ||
// Don't handle vectors. | ||
if (S.OpValue->getType()->isVectorTy() && | ||
!isa<InsertElementInst>(S.OpValue)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!RunSLPReVectorization) { | |
// Don't handle vectors. | |
if (S.OpValue->getType()->isVectorTy() && | |
!isa<InsertElementInst>(S.OpValue)) { | |
// Don't handle vectors. | |
if (!RunSLPReVectorization && S.OpValue->getType()->isVectorTy() && | |
!isa<InsertElementInst>(S.OpValue)) { |
if (!RunSLPReVectorization) { | |
// Don't handle vectors. | |
if (S.OpValue->getType()->isVectorTy() && | |
!isa<InsertElementInst>(S.OpValue)) { | |
if (!RunSLPReVectorization) { | |
// Don't handle vectors. | |
if (S.OpValue->getType()->isVectorTy() && | |
!isa<InsertElementInst>(S.OpValue)) { |
@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { | |||
Value *castToScalarTyElem(Value *V, | |||
std::optional<bool> IsSigned = std::nullopt) { | |||
auto *VecTy = cast<VectorType>(V->getType()); | |||
if (VecTy->getElementType() == ScalarTy) | |||
if (VecTy->getElementType() == ScalarTy->getScalarType()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if the number of elements in ScalarTy and in VecTy is different, is this possible?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Possible. They may have different number of elements.
The number of elements for VecTy
is a multiple of ScalarTy
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then you need to handle it here or add the assertion that the number of elements expected to be the same only
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe my previous description cause misleading.
The V
here is a vectorized result. The number of elements for V
cannot be equal to the number of elements for ScalarTy
.
For example,
%0 = load <4 x i32>, ptr %a, align 4
%1 = load <4 x i32>, ptr %arrayidx3, align 4
%2 = load <4 x i32>, ptr %arrayidx7, align 4
%3 = load <4 x i32>, ptr %arrayidx11, align 4
The ScalarTy
here is <4 x i32>
but VecTy
is <16 x i32>
. (from FinalShuffle -> ShuffleBuilder.addOrdered -> add -> castToScalarTyElem
)
It also happend when REVEC is disabled. See llvm/test/Transforms/SLPVectorizer/reschedule.ll
, ScalarTy
is double
but VecTy
is <2 x double>
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add the assertion, that ScalarTy is either the scalar or has the same number of elements as VecTy?
4b12398
to
3d610df
Compare
@@ -113,6 +113,10 @@ static cl::opt<bool> | |||
RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, | |||
cl::desc("Run the SLP vectorization passes")); | |||
|
|||
static cl::opt<bool> | |||
RunSLPReVectorization("revectorize-slp", cl::init(false), cl::Hidden, | |||
cl::desc("Run the SLP revectorization passes")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I rather doubt it runs a new pass. Better to say something like "Enable vectorization for wider vector utilization" or something like this.
@@ -227,13 +231,24 @@ static const unsigned MaxPHINumOperands = 128; | |||
/// avoids spending time checking the cost model and realizing that they will | |||
/// be inevitably scalarized. | |||
static bool isValidElementType(Type *Ty) { | |||
// TODO: Support ScalableVectorType. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the assertion for scalable types for now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add assert in isValidElementType
will make SLP cannot work. But we can add assert inside getNumElements
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why? We do not support scalable types in SLP vectorizer, so it should be fine (at least for now) that SLP does not work with scalable typesбгт less it crashes the compiler. The crashes should be fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because ScalableVectorType
is accidentally passed into isValidElementType
.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8925e3b2d91a..12bc491381fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7512,7 +7512,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
}
}
- if (!isValidElementType(EltTy))
+ if (isa<ScalableVectorType>(EltTy) || !isValidElementType(EltTy))
return 0;
uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
@@ -18901,6 +18901,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// No need to analyze deleted, vectorized and non-vectorizable
// instructions.
if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
+ !isa<ScalableVectorType>(P->getType()) &&
isValidElementType(P->getType()))
Incoming.push_back(P);
}
@@ -19058,6 +19059,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
if (TryToVectorizeRoot) {
for (auto *V : It->operand_values()) {
+ if (isa<ScalableVectorType>(V->getType()))
+ continue;
// Postponed instructions should not be vectorized here, delay their
// vectorization.
if (auto *VI = dyn_cast<Instruction>(V);
The first one is triggered by llvm/test/Transforms/SLPVectorizer/RISCV/scalable-type-to-vect.ll
.
The second and the third one is triggered by llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
.
if (!RunSLPReVectorization) | ||
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue)) | ||
if (SI->getValueOperand()->getType()->isVectorTy()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!RunSLPReVectorization) | |
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue)) | |
if (SI->getValueOperand()->getType()->isVectorTy()) { | |
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue)) | |
if (!RunSLPReVectorization && SI->getValueOperand()->getType()->isVectorTy()) { |
@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { | |||
Value *castToScalarTyElem(Value *V, | |||
std::optional<bool> IsSigned = std::nullopt) { | |||
auto *VecTy = cast<VectorType>(V->getType()); | |||
if (VecTy->getElementType() == ScalarTy) | |||
if (VecTy->getElementType() == ScalarTy->getScalarType()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then you need to handle it here or add the assertion that the number of elements expected to be the same only
@@ -12195,7 +12211,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, | |||
return ShuffleBuilder.finalize(std::nullopt); | |||
}; | |||
Value *V = vectorizeTree(VE, PostponedPHIs); | |||
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) { | |||
if ((VF * getNumElements(VL[0]->getType())) != |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop extra parens
3d610df
to
595fce7
Compare
Give better title and subject description |
More description can only be modified by squash and merge. |
GitHub uses the PR description when merging the commit, updating the PR description should be suffficient |
595fce7
to
75ca074
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Hi @HanKuanChen, we have several internal tests that are hitting an assertion failure which I bisected back to this change. I have filed PR #99411 for the issue, can you take a look? |
we hit an assert from this patch in chromium code reduced c++ repro with
|
…mple test. (#98269) This PR will make SLP support revectorization. Add an option -slp-revec to control the functionality. reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436
This PR will make SLP support revectorization. Add an option -slp-revec
to control the functionality.
reference:
https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436