[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType ScalarTy. #109369

HanKuanChen · 2024-09-20T03:58:58Z

No description provided.

llvmbot · 2024-09-20T03:59:34Z

@llvm/pr-subscribers-llvm-transforms

Author: Han-Kuan Chen (HanKuanChen)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/109369.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+22-10)
(added) llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll (+31)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ddeb97463281c9..490d40bb182d34 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11732,8 +11732,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
   // Find the cost of inserting/extracting values from the vector.
   // Check if the same elements are inserted several times and count them as
   // shuffle candidates.
-  unsigned ScalarTyNumElements = getNumElements(ScalarTy);
-  APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
+  APInt ShuffledElements = APInt::getZero(VL.size());
   DenseMap<Value *, unsigned> UniqueElements;
   constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   InstructionCost Cost;
@@ -11753,8 +11752,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     Value *V = VL[I];
     // No need to shuffle duplicates for constants.
     if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
-      ShuffledElements.setBits(I * ScalarTyNumElements,
-                               I * ScalarTyNumElements + ScalarTyNumElements);
+      ShuffledElements.setBit(I);
       ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
       continue;
     }
@@ -11767,14 +11765,28 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     }
 
     DuplicateNonConst = true;
-    ShuffledElements.setBits(I * ScalarTyNumElements,
-                             I * ScalarTyNumElements + ScalarTyNumElements);
+    ShuffledElements.setBit(I);
     ShuffleMask[I] = Res.first->second;
   }
-  if (ForPoisonSrc)
-    Cost =
-        TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
-                                      /*Extract*/ false, CostKind);
+  if (ForPoisonSrc) {
+    if (isa<FixedVectorType>(ScalarTy)) {
+      assert(SLPReVec && "Only supported by REVEC.");
+      // We don't need to insert elements one by one. Instead, we can insert the
+      // entire vector into the destination.
+      Cost = 0;
+      unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+      for (unsigned I = 0, E = VL.size(); I != E; ++I)
+        if (!ShuffledElements[I])
+          Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
+                                      std::nullopt, TTI::TCK_RecipThroughput,
+                                      I * ScalarTyNumElements,
+                                      cast<FixedVectorType>(ScalarTy));
+    } else {
+      Cost = TTI->getScalarizationOverhead(VecTy, ~ShuffledElements,
+                                           /*Insert*/ true,
+                                           /*Extract*/ false, CostKind);
+    }
+  }
   if (DuplicateNonConst)
     Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
                              VecTy, ShuffleMask);
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
new file mode 100644
index 00000000000000..648eb5bc5119dc
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 %s | FileCheck %s
+
+define void @test(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> poison, i64 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[LOAD17:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[TMP6]])
+; CHECK-NEXT:    store <8 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vext165.i = shufflevector <4 x float> %load6, <4 x float> %load7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %vext309.i = shufflevector <4 x float> %load7, <4 x float> %load8, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %fmuladd8 = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %vext165.i, <4 x float> %load17, <4 x float> %fmuladd7)
+  %fmuladd17 = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %vext309.i, <4 x float> %load17, <4 x float> %fmuladd16)
+  %add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 16
+  store <4 x float> %fmuladd8, ptr %out_ptr, align 4
+  store <4 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
+  ret void
+}
+
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)

ScalarTy.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

alexey-bataev · 2024-09-20T14:23:00Z

llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll

@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 %s | FileCheck %s


Would be good to add remarks check to see cost changes

alexey-bataev

LG

[SLP][REVEC] Pre-commit test.

95191e0

HanKuanChen requested a review from alexey-bataev September 20, 2024 03:58

llvmbot added vectorizers llvm:transforms labels Sep 20, 2024

[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType

5915170

ScalarTy.

HanKuanChen force-pushed the slp-revec-getGatherCost branch 2 times, most recently from 14101df to 5915170 Compare September 20, 2024 07:06

alexey-bataev reviewed Sep 20, 2024

View reviewed changes

HanKuanChen added 2 commits September 22, 2024 00:06

[SLP][REVEC] Apply comment.

eee7a62

[SLP][REVEC] Apply comment.

eda7eff

HanKuanChen requested a review from alexey-bataev September 22, 2024 07:35

alexey-bataev approved these changes Sep 23, 2024

View reviewed changes

HanKuanChen merged commit 0062975 into llvm:main Sep 24, 2024
8 checks passed

HanKuanChen deleted the slp-revec-getGatherCost branch September 24, 2024 00:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType ScalarTy. #109369

[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType ScalarTy. #109369

Uh oh!

HanKuanChen commented Sep 20, 2024

Uh oh!

llvmbot commented Sep 20, 2024

Uh oh!

Uh oh!

alexey-bataev Sep 20, 2024

Uh oh!

HanKuanChen Sep 22, 2024

Uh oh!

alexey-bataev left a comment

Uh oh!

Uh oh!

Uh oh!

		@@ -0,0 +1,31 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
		; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 %s \| FileCheck %s

[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType ScalarTy. #109369

[SLP][REVEC] Fix cost model for getGatherCost with FixedVectorType ScalarTy. #109369

Uh oh!

Conversation

HanKuanChen commented Sep 20, 2024

Uh oh!

llvmbot commented Sep 20, 2024

Uh oh!

Uh oh!

alexey-bataev Sep 20, 2024

Choose a reason for hiding this comment

Uh oh!

HanKuanChen Sep 22, 2024

Choose a reason for hiding this comment

Uh oh!

alexey-bataev left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!