[SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. #98269

HanKuanChen · 2024-07-10T04:22:57Z

This PR will make SLP support revectorization. Add an option -slp-revec
to control the functionality.

reference:
https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436

llvmbot · 2024-07-10T04:23:27Z

@llvm/pr-subscribers-llvm-transforms

Author: Han-Kuan Chen (HanKuanChen)

Changes

Full implementation is in https://github.com/HanKuanChen/llvm-project/tree/slp-revec.

Full diff: https://github.com/llvm/llvm-project/pull/98269.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+32-14)
(added) llvm/test/Transforms/SLPVectorizer/revec.ll (+40)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1e9dd8c1e2287..a948f7629c705 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -113,6 +113,10 @@ static cl::opt<bool>
     RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
                         cl::desc("Run the SLP vectorization passes"));
 
+static cl::opt<bool>
+    RunSLPReVectorization("revectorize-slp", cl::init(false), cl::Hidden,
+                          cl::desc("Run the SLP revectorization passes"));
+
 static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
                      cl::desc("Only vectorize if you gain more than this "
@@ -227,15 +231,26 @@ static const unsigned MaxPHINumOperands = 128;
 /// avoids spending time checking the cost model and realizing that they will
 /// be inevitably scalarized.
 static bool isValidElementType(Type *Ty) {
+  if (RunSLPReVectorization && isa<FixedVectorType>(Ty))
+    Ty = Ty->getScalarType();
   return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
          !Ty->isPPC_FP128Ty();
 }
 
 /// \returns the vector type of ScalarTy based on vectorization factor.
 static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
+  if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+    return FixedVectorType::get(VecTy->getElementType(),
+                                VF * VecTy->getNumElements());
   return FixedVectorType::get(ScalarTy, VF);
 }
 
+static unsigned getNumElements(Type *Ty) {
+  if (auto *VecTy = dyn_cast<FixedVectorType>(Ty))
+    return VecTy->getNumElements();
+  return 1;
+}
+
 /// \returns True if the value is a constant (but not globals/constant
 /// expressions).
 static bool isConstant(Value *V) {
@@ -6779,21 +6794,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     return;
   }
 
-  // Don't handle vectors.
-  if (S.OpValue->getType()->isVectorTy() &&
-      !isa<InsertElementInst>(S.OpValue)) {
-    LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
-    newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
-    return;
-  }
-
-  if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
-    if (SI->getValueOperand()->getType()->isVectorTy()) {
-      LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+  if (!RunSLPReVectorization) {
+    // Don't handle vectors.
+    if (S.OpValue->getType()->isVectorTy() &&
+        !isa<InsertElementInst>(S.OpValue)) {
+      LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
       newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
       return;
     }
 
+    if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
+      if (SI->getValueOperand()->getType()->isVectorTy()) {
+        LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+        newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
+        return;
+      }
+  }
+
   // If all of the operands are identical or constant we have a simple solution.
   // If we deal with insert/extract instructions, they all must have constant
   // indices, otherwise we should gather them, not try to vectorize.
@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
   Value *castToScalarTyElem(Value *V,
                             std::optional<bool> IsSigned = std::nullopt) {
     auto *VecTy = cast<VectorType>(V->getType());
-    if (VecTy->getElementType() == ScalarTy)
+    if (VecTy->getElementType() == ScalarTy->getScalarType())
       return V;
     return Builder.CreateIntCast(
-        V, VectorType::get(ScalarTy, VecTy->getElementCount()),
+        V, VectorType::get(ScalarTy->getScalarType(), VecTy->getElementCount()),
         IsSigned.value_or(!isKnownNonNegative(V, SimplifyQuery(*R.DL))));
   }
 
@@ -12195,7 +12212,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
         return ShuffleBuilder.finalize(std::nullopt);
       };
       Value *V = vectorizeTree(VE, PostponedPHIs);
-      if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+      if ((VF * getNumElements(VL[0]->getType())) !=
+          cast<FixedVectorType>(V->getType())->getNumElements()) {
         if (!VE->ReuseShuffleIndices.empty()) {
           // Reshuffle to get only unique values.
           // If some of the scalars are duplicated in the vectorization
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
new file mode 100644
index 0000000000000..3b9b242eef7ad
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -revectorize-slp -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
+
+define void @test1(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
+  %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12
+  %0 = load <4 x i32>, ptr %a, align 4
+  %1 = load <4 x i32>, ptr %arrayidx3, align 4
+  %2 = load <4 x i32>, ptr %arrayidx7, align 4
+  %3 = load <4 x i32>, ptr %arrayidx11, align 4
+  %arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4
+  %arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8
+  %arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12
+  %4 = load <4 x i32>, ptr %b, align 4
+  %5 = load <4 x i32>, ptr %arrayidx19, align 4
+  %6 = load <4 x i32>, ptr %arrayidx23, align 4
+  %7 = load <4 x i32>, ptr %arrayidx27, align 4
+  %add.i = add <4 x i32> %4, %0
+  %add.i63 = add <4 x i32> %5, %1
+  %add.i64 = add <4 x i32> %6, %2
+  %add.i65 = add <4 x i32> %7, %3
+  %arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4
+  %arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8
+  %arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12
+  store <4 x i32> %add.i, ptr %c, align 4
+  store <4 x i32> %add.i63, ptr %arrayidx36, align 4
+  store <4 x i32> %add.i64, ptr %arrayidx39, align 4
+  store <4 x i32> %add.i65, ptr %arrayidx42, align 4
+  ret void
+}

alexey-bataev · 2024-07-10T12:54:15Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+  if (RunSLPReVectorization && isa<FixedVectorType>(Ty))
+    Ty = Ty->getScalarType();


Do you need the check here? Better just use Ty->getScalarTy() directly here and check for RunSLPReVectorization only where the vectorization is requested

We should keep SLPReVec since some FixedVectorType are accidentally passed into isValidElementType.

alexey-bataev · 2024-07-10T12:55:03Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

  return FixedVectorType::get(ScalarTy, VF);
 }

+static unsigned getNumElements(Type *Ty) {


Add a comment with the function description. Also, will it return 1 for scalable vectors too?

Scalable vectors are not supported right now.

Add the assertion for scalable vectors

alexey-bataev · 2024-07-10T12:56:22Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+  if (!RunSLPReVectorization) {
+    // Don't handle vectors.
+    if (S.OpValue->getType()->isVectorTy() &&
+        !isa<InsertElementInst>(S.OpValue)) {


Suggested change

if (!RunSLPReVectorization) {

// Don't handle vectors.

if (S.OpValue->getType()->isVectorTy() &&

!isa<InsertElementInst>(S.OpValue)) {

// Don't handle vectors.

if (!RunSLPReVectorization && S.OpValue->getType()->isVectorTy() &&

!isa<InsertElementInst>(S.OpValue)) {

Suggested change

if (!RunSLPReVectorization) {

// Don't handle vectors.

if (S.OpValue->getType()->isVectorTy() &&

!isa<InsertElementInst>(S.OpValue)) {

if (!RunSLPReVectorization) {

// Don't handle vectors.

if (S.OpValue->getType()->isVectorTy() &&

!isa<InsertElementInst>(S.OpValue)) {

alexey-bataev · 2024-07-10T12:57:55Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
  Value *castToScalarTyElem(Value *V,
                            std::optional<bool> IsSigned = std::nullopt) {
    auto *VecTy = cast<VectorType>(V->getType());
-    if (VecTy->getElementType() == ScalarTy)
+    if (VecTy->getElementType() == ScalarTy->getScalarType())


What if the number of elements in ScalarTy and in VecTy is different, is this possible?

Possible. They may have different number of elements.
The number of elements for VecTy is a multiple of ScalarTy.

Then you need to handle it here or add the assertion that the number of elements expected to be the same only

Maybe my previous description cause misleading.
The V here is a vectorized result. The number of elements for V cannot be equal to the number of elements for ScalarTy.
For example,

%0 = load <4 x i32>, ptr %a, align 4 %1 = load <4 x i32>, ptr %arrayidx3, align 4 %2 = load <4 x i32>, ptr %arrayidx7, align 4 %3 = load <4 x i32>, ptr %arrayidx11, align 4

The ScalarTy here is <4 x i32> but VecTy is <16 x i32>. (from FinalShuffle -> ShuffleBuilder.addOrdered -> add -> castToScalarTyElem)
It also happend when REVEC is disabled. See llvm/test/Transforms/SLPVectorizer/reschedule.ll, ScalarTy is double but VecTy is <2 x double>.

Can you add the assertion, that ScalarTy is either the scalar or has the same number of elements as VecTy?

alexey-bataev · 2024-07-10T18:06:07Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -113,6 +113,10 @@ static cl::opt<bool>
    RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
                        cl::desc("Run the SLP vectorization passes"));

+static cl::opt<bool>
+    RunSLPReVectorization("revectorize-slp", cl::init(false), cl::Hidden,
+                          cl::desc("Run the SLP revectorization passes"));


I rather doubt it runs a new pass. Better to say something like "Enable vectorization for wider vector utilization" or something like this.

alexey-bataev · 2024-07-10T18:06:29Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -227,13 +231,24 @@ static const unsigned MaxPHINumOperands = 128;
 /// avoids spending time checking the cost model and realizing that they will
 /// be inevitably scalarized.
 static bool isValidElementType(Type *Ty) {
+  // TODO: Support ScalableVectorType.


Add the assertion for scalable types for now

Add assert in isValidElementType will make SLP cannot work. But we can add assert inside getNumElements.

Why? We do not support scalable types in SLP vectorizer, so it should be fine (at least for now) that SLP does not work with scalable typesбгт less it crashes the compiler. The crashes should be fixed.

Because ScalableVectorType is accidentally passed into isValidElementType.

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8925e3b2d91a..12bc491381fe 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7512,7 +7512,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { } } - if (!isValidElementType(EltTy)) + if (isa<ScalableVectorType>(EltTy) || !isValidElementType(EltTy)) return 0; uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N)); if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || @@ -18901,6 +18901,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // No need to analyze deleted, vectorized and non-vectorizable // instructions. if (!VisitedInstrs.count(P) && !R.isDeleted(P) && + !isa<ScalableVectorType>(P->getType()) && isValidElementType(P->getType())) Incoming.push_back(P); } @@ -19058,6 +19059,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } if (TryToVectorizeRoot) { for (auto *V : It->operand_values()) { + if (isa<ScalableVectorType>(V->getType())) + continue; // Postponed instructions should not be vectorized here, delay their // vectorization. if (auto *VI = dyn_cast<Instruction>(V);

The first one is triggered by llvm/test/Transforms/SLPVectorizer/RISCV/scalable-type-to-vect.ll.
The second and the third one is triggered by llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll.

alexey-bataev · 2024-07-10T18:07:21Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+  if (!RunSLPReVectorization)
+    if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
+      if (SI->getValueOperand()->getType()->isVectorTy()) {


Suggested change

if (!RunSLPReVectorization)

if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))

if (SI->getValueOperand()->getType()->isVectorTy()) {

if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))

if (!RunSLPReVectorization && SI->getValueOperand()->getType()->isVectorTy()) {

alexey-bataev · 2024-07-10T18:08:06Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -11807,10 +11824,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
  Value *castToScalarTyElem(Value *V,
                            std::optional<bool> IsSigned = std::nullopt) {
    auto *VecTy = cast<VectorType>(V->getType());
-    if (VecTy->getElementType() == ScalarTy)
+    if (VecTy->getElementType() == ScalarTy->getScalarType())


Then you need to handle it here or add the assertion that the number of elements expected to be the same only

alexey-bataev · 2024-07-10T18:08:47Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -12195,7 +12211,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
        return ShuffleBuilder.finalize(std::nullopt);
      };
      Value *V = vectorizeTree(VE, PostponedPHIs);
-      if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+      if ((VF * getNumElements(VL[0]->getType())) !=


Drop extra parens

alexey-bataev · 2024-07-12T18:26:52Z

Give better title and subject description

HanKuanChen · 2024-07-15T07:08:43Z

Give better title and subject description

More description can only be modified by squash and merge.

fhahn · 2024-07-15T07:21:16Z

GitHub uses the PR description when merging the commit, updating the PR description should be suffficient

alexey-bataev

LG

dyung · 2024-07-18T00:03:55Z

Hi @HanKuanChen, we have several internal tests that are hitting an assertion failure which I bisected back to this change. I have filed PR #99411 for the issue, can you take a look?

amykhuang · 2024-07-18T00:07:19Z

we hit an assert from this patch in chromium code

reduced c++ repro with clang -cc1 -triple arm64-apple-macosx10.15.0 -O3 -vectorize-slp -emit-obj t.c -target-cpu apple-m1

typedef __attribute__((neon_vector_type(8))) unsigned char a;
a b;
void c(long *g) {
  a d, e, f = __builtin_neon_vpadd_v(e, d, 16);
  long bitmap = ({ __builtin_neon_vget_lane_i64(b, 0); });
  g[0] = ~bitmap;
  bitmap = ({ __builtin_neon_vget_lane_i64(f, 0); });
  g[1] = ~bitmap;
}

…mple test. (#98269) This PR will make SLP support revectorization. Add an option -slp-revec to control the functionality. reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436

HanKuanChen requested review from fhahn, preames, hiraditya, RKSimon and alexey-bataev July 10, 2024 04:22

llvmbot added vectorizers llvm:transforms labels Jul 10, 2024

alexey-bataev reviewed Jul 10, 2024

View reviewed changes

HanKuanChen force-pushed the slp-revec-initial branch from 4b12398 to 3d610df Compare July 10, 2024 17:12

HanKuanChen requested a review from alexey-bataev July 10, 2024 17:13

alexey-bataev reviewed Jul 10, 2024

View reviewed changes

HanKuanChen added 2 commits July 10, 2024 20:41

[SLP][REVEC] Add an option to control SLP revectorization. NFC.

bb26a10

[SLP][REVEC] Pre-commit test.

42de8a8

HanKuanChen force-pushed the slp-revec-initial branch from 3d610df to 595fce7 Compare July 11, 2024 04:18

HanKuanChen requested a review from alexey-bataev July 11, 2024 04:18

HanKuanChen changed the title ~~[SLP][REVEC] Initial commits.~~ [SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. Jul 15, 2024

HanKuanChen added 5 commits July 15, 2024 21:43

[SLP][REVEC] Apply RunSLPReVectorization to the existing code.

a80030a

[SLP][REVEC] Make castToScalarTyElem support vector instructions.

aeec2ee

[SLP][REVEC] NFC. Provide an universal interface for getNumElements.

8abad7a

[SLP][REVEC] Make vectorizeOperand support vector instructions.

0b2e709

[SLP][REVEC] Update test1.

75ca074

HanKuanChen force-pushed the slp-revec-initial branch from 595fce7 to 75ca074 Compare July 16, 2024 04:46

alexey-bataev approved these changes Jul 17, 2024

View reviewed changes

HanKuanChen merged commit 1813ffd into llvm:main Jul 17, 2024
7 checks passed

HanKuanChen deleted the slp-revec-initial branch July 17, 2024 12:14

		if (RunSLPReVectorization && isa<FixedVectorType>(Ty))
		Ty = Ty->getScalarType();

[SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. #98269

[SLP][REVEC] Make SLP support revectorization (-slp-revec) and add simple test. #98269

Uh oh!

Conversation

HanKuanChen commented Jul 10, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 10, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

alexey-bataev commented Jul 12, 2024

Uh oh!

HanKuanChen commented Jul 15, 2024

Uh oh!

fhahn commented Jul 15, 2024

Uh oh!

alexey-bataev left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

dyung commented Jul 18, 2024

Uh oh!

amykhuang commented Jul 18, 2024

Uh oh!

Uh oh!

HanKuanChen commented Jul 10, 2024 •

edited

Loading