Skip to content

[SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. #98269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorization passes"));

static cl::opt<bool>
SLPReVec("slp-revec", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization for wider vector utilization"));

static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
Expand Down Expand Up @@ -227,13 +231,26 @@ static const unsigned MaxPHINumOperands = 128;
/// avoids spending time checking the cost model and realizing that they will
/// be inevitably scalarized.
static bool isValidElementType(Type *Ty) {
// TODO: Support ScalableVectorType.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the assertion for scalable types for now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add assert in isValidElementType will make SLP cannot work. But we can add assert inside getNumElements.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? We do not support scalable types in SLP vectorizer, so it should be fine (at least for now) that SLP does not work with scalable typesбгт less it crashes the compiler. The crashes should be fixed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because ScalableVectorType is accidentally passed into isValidElementType.

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8925e3b2d91a..12bc491381fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7512,7 +7512,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
     }
   }

-  if (!isValidElementType(EltTy))
+  if (isa<ScalableVectorType>(EltTy) || !isValidElementType(EltTy))
     return 0;
   uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
   if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
@@ -18901,6 +18901,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       // No need to analyze deleted, vectorized and non-vectorizable
       // instructions.
       if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
+          !isa<ScalableVectorType>(P->getType()) &&
           isValidElementType(P->getType()))
         Incoming.push_back(P);
     }
@@ -19058,6 +19059,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       }
       if (TryToVectorizeRoot) {
         for (auto *V : It->operand_values()) {
+          if (isa<ScalableVectorType>(V->getType()))
+            continue;
           // Postponed instructions should not be vectorized here, delay their
           // vectorization.
           if (auto *VI = dyn_cast<Instruction>(V);

The first one is triggered by llvm/test/Transforms/SLPVectorizer/RISCV/scalable-type-to-vect.ll.
The second and the third one is triggered by llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll.

if (SLPReVec && isa<FixedVectorType>(Ty))
Ty = Ty->getScalarType();
return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
!Ty->isPPC_FP128Ty();
}

/// \returns the number of elements for Ty.
static unsigned getNumElements(Type *Ty) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment with the function description. Also, will it return 1 for scalable vectors too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scalable vectors are not supported right now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the assertion for scalable vectors

assert(!isa<ScalableVectorType>(Ty) &&
"ScalableVectorType is not supported.");
if (auto *VecTy = dyn_cast<FixedVectorType>(Ty))
return VecTy->getNumElements();
return 1;
}

/// \returns the vector type of ScalarTy based on vectorization factor.
static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
return FixedVectorType::get(ScalarTy, VF);
return FixedVectorType::get(ScalarTy->getScalarType(),
VF * getNumElements(ScalarTy));
}

/// \returns True if the value is a constant (but not globals/constant
Expand Down Expand Up @@ -6780,15 +6797,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}

// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy() &&
if (!SLPReVec && S.OpValue->getType()->isVectorTy() &&
!isa<InsertElementInst>(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
}

if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {
if (!SLPReVec && SI->getValueOperand()->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
Expand Down Expand Up @@ -11807,10 +11824,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
Value *castToScalarTyElem(Value *V,
std::optional<bool> IsSigned = std::nullopt) {
auto *VecTy = cast<VectorType>(V->getType());
if (VecTy->getElementType() == ScalarTy)
assert(getNumElements(ScalarTy) < getNumElements(VecTy) &&
(getNumElements(VecTy) % getNumElements(ScalarTy) == 0));
if (VecTy->getElementType() == ScalarTy->getScalarType())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the number of elements in ScalarTy and in VecTy is different, is this possible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible. They may have different number of elements.
The number of elements for VecTy is a multiple of ScalarTy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then you need to handle it here or add the assertion that the number of elements expected to be the same only

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe my previous description cause misleading.
The V here is a vectorized result. The number of elements for V cannot be equal to the number of elements for ScalarTy.
For example,

  %0 = load <4 x i32>, ptr %a, align 4
  %1 = load <4 x i32>, ptr %arrayidx3, align 4
  %2 = load <4 x i32>, ptr %arrayidx7, align 4
  %3 = load <4 x i32>, ptr %arrayidx11, align 4

The ScalarTy here is <4 x i32> but VecTy is <16 x i32>. (from FinalShuffle -> ShuffleBuilder.addOrdered -> add -> castToScalarTyElem)
It also happend when REVEC is disabled. See llvm/test/Transforms/SLPVectorizer/reschedule.ll, ScalarTy is double but VecTy is <2 x double>.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add the assertion, that ScalarTy is either the scalar or has the same number of elements as VecTy?

return V;
return Builder.CreateIntCast(
V, VectorType::get(ScalarTy, VecTy->getElementCount()),
V, VectorType::get(ScalarTy->getScalarType(), VecTy->getElementCount()),
IsSigned.value_or(!isKnownNonNegative(V, SimplifyQuery(*R.DL))));
}

Expand Down Expand Up @@ -12195,7 +12214,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
return ShuffleBuilder.finalize(std::nullopt);
};
Value *V = vectorizeTree(VE, PostponedPHIs);
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
if (VF * getNumElements(VL[0]->getType()) !=
cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
// Reshuffle to get only unique values.
// If some of the scalars are duplicated in the vectorization
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/revec.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s

define void @test1(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
%arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
%arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12
%0 = load <4 x i32>, ptr %a, align 4
%1 = load <4 x i32>, ptr %arrayidx3, align 4
%2 = load <4 x i32>, ptr %arrayidx7, align 4
%3 = load <4 x i32>, ptr %arrayidx11, align 4
%arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4
%arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8
%arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12
%4 = load <4 x i32>, ptr %b, align 4
%5 = load <4 x i32>, ptr %arrayidx19, align 4
%6 = load <4 x i32>, ptr %arrayidx23, align 4
%7 = load <4 x i32>, ptr %arrayidx27, align 4
%add.i = add <4 x i32> %4, %0
%add.i63 = add <4 x i32> %5, %1
%add.i64 = add <4 x i32> %6, %2
%add.i65 = add <4 x i32> %7, %3
%arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4
%arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8
%arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12
store <4 x i32> %add.i, ptr %c, align 4
store <4 x i32> %add.i63, ptr %arrayidx36, align 4
store <4 x i32> %add.i64, ptr %arrayidx39, align 4
store <4 x i32> %add.i65, ptr %arrayidx42, align 4
ret void
}
Loading