Skip to content

[SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but the type is vector. #128476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 24, 2025

Conversation

HanKuanChen
Copy link
Contributor

Fix "Vector size mismatch".

@llvmbot
Copy link
Member

llvmbot commented Feb 24, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-backend-systemz

Author: Han-Kuan Chen (HanKuanChen)

Changes

Fix "Vector size mismatch".


Full diff: https://github.com/llvm/llvm-project/pull/128476.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+20-4)
  • (added) llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll (+45)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf256d82ae17d..181fee5adbd10 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13963,15 +13963,31 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     ShuffledElements.setBit(I);
     ShuffleMask[I] = Res.first->second;
   }
-  if (!DemandedElements.isZero())
-    Cost +=
-        TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
-                                      /*Extract=*/false, CostKind, VL);
+  if (!DemandedElements.isZero()) {
+    if (isa<FixedVectorType>(ScalarTy)) {
+      assert(SLPReVec && "Only supported by REVEC.");
+      // We don't need to insert elements one by one. Instead, we can insert the
+      // entire vector into the destination.
+      Cost = 0;
+      unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+      for (unsigned I : seq<unsigned>(VL.size()))
+        if (DemandedElements[I])
+          Cost += TTI->getShuffleCost(
+              TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
+              I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+    } else {
+      Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
+                                            /*Insert=*/true,
+                                            /*Extract=*/false, CostKind, VL);
+    }
+  }
   if (ForPoisonSrc) {
     if (isa<FixedVectorType>(ScalarTy)) {
       assert(SLPReVec && "Only supported by REVEC.");
       // We don't need to insert elements one by one. Instead, we can insert the
       // entire vector into the destination.
+      assert(DemandedElements.isZero() &&
+             "Need to consider the cost from DemandedElements.");
       Cost = 0;
       unsigned ScalarTyNumElements = getNumElements(ScalarTy);
       for (unsigned I : seq<unsigned>(VL.size()))
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
new file mode 100644
index 0000000000000..be42207e207a0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @e(<4 x i16> %0) {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i16> [[TMP0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp sgt <4 x i16> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT:    [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT:    br label [[VECTOR_BODY]]
+;
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %vec.ind = phi <4 x i16> [ zeroinitializer, %entry ], [ zeroinitializer, %vector.body ]
+  %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %13, %vector.body ]
+  %1 = icmp sgt <4 x i16> %vec.ind, zeroinitializer
+  %2 = zext <4 x i1> %1 to <4 x i32>
+  %3 = add <4 x i16> %vec.ind, zeroinitializer
+  %4 = icmp sgt <4 x i16> %0, zeroinitializer
+  %5 = zext <4 x i1> %4 to <4 x i32>
+  %6 = or <4 x i32> %2, %5
+  %7 = add <4 x i16> zeroinitializer, zeroinitializer
+  %8 = icmp sgt <4 x i16> %3, zeroinitializer
+  %9 = zext <4 x i1> %8 to <4 x i32>
+  %10 = or <4 x i32> %6, %9
+  %11 = icmp sgt <4 x i16> %7, zeroinitializer
+  %12 = zext <4 x i1> %11 to <4 x i32>
+  %13 = or <4 x i32> %10, %12
+  br label %vector.body
+}

@HanKuanChen HanKuanChen merged commit 3a6108b into llvm:main Feb 24, 2025
11 checks passed
@HanKuanChen HanKuanChen deleted the slp-revec-fix-128169 branch February 24, 2025 15:43
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants