-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP][REVEC] getNumElements should not be used as VF when REVEC is enabled. #134031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP][REVEC] getNumElements should not be used as VF when REVEC is enabled. #134031
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesFull diff: https://github.com/llvm/llvm-project/pull/134031.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 18c896767b6d2..6201541d905bd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11298,8 +11298,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
if (!E2 && InVectors.size() == 1) {
unsigned VF = E1.getVectorFactor();
if (Value *V1 = dyn_cast<Value *>(InVectors.front())) {
- VF = std::max(VF,
- cast<FixedVectorType>(V1->getType())->getNumElements());
+ VF = std::max(VF, getVF(V1));
} else {
const auto *E = cast<const TreeEntry *>(InVectors.front());
VF = std::max(VF, E->getVectorFactor());
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll
new file mode 100644
index 0000000000000..c69d9eaa572b0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-estimateNodesPermuteCost.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
+
+define i32 @test1(<4 x float> %0, <4 x float> %1) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr null, i64 288
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr null, i64 304
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr null, i64 416
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, i64 432
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr null, i64 256
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr null, i64 272
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr null, i64 288
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr null, i64 304
+; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[TMP2]], align 16
+; CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[TMP3]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, ptr [[TMP4]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = load <4 x float>, ptr [[TMP5]], align 16
+; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP10]], [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP11]], [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = fmul <4 x float> [[TMP12]], [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP13]], [[TMP0]]
+; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP14]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP20:%.*]] = fsub <4 x float> [[TMP16]], [[TMP1]]
+; CHECK-NEXT: [[TMP21:%.*]] = fsub <4 x float> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = fmul <4 x float> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP18]], [[TMP0]]
+; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP26:%.*]] = fadd <4 x float> [[TMP20]], [[TMP0]]
+; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: store <4 x float> [[TMP24]], ptr [[TMP6]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP25]], ptr [[TMP7]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP26]], ptr [[TMP8]], align 16
+; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[TMP9]], align 16
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %2 = getelementptr i8, ptr null, i64 288
+ %3 = getelementptr i8, ptr null, i64 304
+ %4 = getelementptr i8, ptr null, i64 416
+ %5 = getelementptr i8, ptr null, i64 432
+ %6 = getelementptr i8, ptr null, i64 256
+ %7 = getelementptr i8, ptr null, i64 272
+ %8 = getelementptr i8, ptr null, i64 288
+ %9 = getelementptr i8, ptr null, i64 304
+ %10 = load <4 x float>, ptr %2, align 16
+ %11 = load <4 x float>, ptr %3, align 16
+ %12 = load <4 x float>, ptr %4, align 16
+ %13 = load <4 x float>, ptr %5, align 16
+ %14 = fmul <4 x float> %10, %0
+ %15 = fmul <4 x float> %11, %0
+ %16 = fmul <4 x float> %12, %0
+ %17 = fmul <4 x float> %13, %0
+ %18 = fsub <4 x float> %14, %1
+ %19 = fsub <4 x float> %15, zeroinitializer
+ %20 = fsub <4 x float> %16, %1
+ %21 = fsub <4 x float> %17, zeroinitializer
+ %22 = fmul <4 x float> %11, zeroinitializer
+ %23 = fmul <4 x float> %13, zeroinitializer
+ %24 = fadd <4 x float> %18, %0
+ %25 = fadd <4 x float> %19, zeroinitializer
+ %26 = fadd <4 x float> %20, %0
+ %27 = fadd <4 x float> %21, zeroinitializer
+ store <4 x float> %24, ptr %6, align 16
+ store <4 x float> %25, ptr %7, align 16
+ store <4 x float> %26, ptr %8, align 16
+ store <4 x float> %27, ptr %9, align 16
+ ret i32 0
+}
|
store <4 x float> %25, ptr %7, align 16 | ||
store <4 x float> %26, ptr %8, align 16 | ||
store <4 x float> %27, ptr %9, align 16 | ||
ret i32 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Checking: Did you intend to pre-commit the test without CHECK lines? Does the opt
output differ after the change in SLPVectorizer.cpp
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Checking: Did you intend to pre-commit the test without CHECK lines?
opt would crash. That is why it does not have CHECK lines.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
No description provided.