-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP]Add debug counter support #110734
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Add debug counter support #110734
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesPatch is 20.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110734.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 15e798bd6c98f9..b68dfbaa6c3075 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -79,6 +79,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
@@ -109,6 +110,9 @@ using namespace slpvectorizer;
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
+DEBUG_COUNTER(VectorizedGraphs, "slp-vectorized",
+ "Controls which SLP graphs should be vectorized.");
+
static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
@@ -11175,6 +11179,9 @@ bool BoUpSLP::isLoadCombineCandidate(ArrayRef<Value *> Stores) const {
}
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
+ if (!DebugCounter::shouldExecute(VectorizedGraphs))
+ return true;
+
// No need to vectorize inserts of gathered values.
if (VectorizableTree.size() == 2 &&
isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
diff --git a/llvm/test/Transforms/SLPVectorizer/debug-counter.ll b/llvm/test/Transforms/SLPVectorizer/debug-counter.ll
new file mode 100644
index 00000000000000..006b6ef33964e5
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/debug-counter.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=slp-vectorizer -debug-counter=slp-vectorized=0 -slp-threshold=-99999 < %s | FileCheck %s --check-prefix=COUNT0
+; RUN: opt -S -passes=slp-vectorizer -debug-counter=slp-vectorized=1 -slp-threshold=-99999 < %s | FileCheck %s --check-prefix=COUNT1
+; RUN: opt -S -passes=slp-vectorizer -debug-counter=slp-vectorized=2 -slp-threshold=-99999 < %s | FileCheck %s --check-prefix=COUNT2
+; RUN: opt -S -passes=slp-vectorizer -debug-counter=slp-vectorized=0-1 -slp-threshold=-99999 < %s | FileCheck %s --check-prefix=COUNT-1
+
+define void @blam(ptr %arg, double %load2, i1 %fcmp3) {
+; CHECK-LABEL: define void @blam
+; CHECK-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARG]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i1> poison, i1 [[FCMP3]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP0]], double [[LOAD2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP0]]
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x double> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 0.000000e+00, double 1.000000e+00>, <2 x double> <double 1.000000e+00, double 0.000000e+00>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt <2 x double> [[TMP9]], [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> <double poison, double 0.000000e+00>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> zeroinitializer
+; CHECK-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8
+; CHECK-NEXT: ret void
+;
+; COUNT0-LABEL: define void @blam
+; COUNT0-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) {
+; COUNT0-NEXT: bb:
+; COUNT0-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr double, ptr [[ARG]], i64 1
+; COUNT0-NEXT: [[LOAD:%.*]] = load double, ptr [[GETELEMENTPTR]], align 8
+; COUNT0-NEXT: [[FCMP:%.*]] = fcmp olt double [[LOAD]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT3:%.*]] = select i1 [[FCMP]], double 0.000000e+00, double [[LOAD]]
+; COUNT0-NEXT: [[SELECT4:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD]]
+; COUNT0-NEXT: [[LOAD7:%.*]] = load double, ptr [[ARG]], align 8
+; COUNT0-NEXT: [[SELECT10:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD7]]
+; COUNT0-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3
+; COUNT0-NEXT: [[GETELEMENTPTR21:%.*]] = getelementptr double, ptr [[ARG]], i64 4
+; COUNT0-NEXT: [[FCMP23:%.*]] = fcmp olt double [[SELECT10]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT24:%.*]] = select i1 [[FCMP23]], double 0.000000e+00, double 1.000000e+00
+; COUNT0-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LOAD2]], i32 1
+; COUNT0-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[SELECT4]], i32 0
+; COUNT0-NEXT: [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer
+; COUNT0-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[LOAD7]], i32 1
+; COUNT0-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> <double 1.000000e+00, double 0.000000e+00>, <2 x double> [[TMP3]]
+; COUNT0-NEXT: [[FCMP33:%.*]] = fcmp olt double [[SELECT24]], [[SELECT3]]
+; COUNT0-NEXT: [[SELECT34:%.*]] = select i1 [[FCMP33]], double 0.000000e+00, double [[LOAD2]]
+; COUNT0-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; COUNT0-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; COUNT0-NEXT: [[FCMP39:%.*]] = fcmp olt double [[TMP5]], [[TMP6]]
+; COUNT0-NEXT: [[SELECT40:%.*]] = select i1 [[FCMP39]], double [[LOAD2]], double 0.000000e+00
+; COUNT0-NEXT: [[FCMP62:%.*]] = fcmp olt double [[SELECT34]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT639:%.*]] = select i1 [[FCMP62]], double 0.000000e+00, double 1.000000e+00
+; COUNT0-NEXT: [[FCMP76:%.*]] = fcmp olt double [[SELECT40]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT77:%.*]] = select i1 [[FCMP76]], double 0.000000e+00, double 1.000000e+00
+; COUNT0-NEXT: [[FCMP90:%.*]] = fcmp ogt double [[SELECT639]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT91:%.*]] = select i1 [[FCMP90]], double 0.000000e+00, double [[LOAD2]]
+; COUNT0-NEXT: [[FCMP92:%.*]] = fcmp ogt double [[SELECT77]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT93:%.*]] = select i1 [[FCMP92]], double 0.000000e+00, double [[LOAD2]]
+; COUNT0-NEXT: [[FCMP108:%.*]] = fcmp olt double [[SELECT93]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT109:%.*]] = select i1 [[FCMP108]], double 1.000000e+00, double 0.000000e+00
+; COUNT0-NEXT: [[FCMP110:%.*]] = fcmp olt double [[SELECT91]], 0.000000e+00
+; COUNT0-NEXT: [[SELECT111:%.*]] = select i1 [[FCMP110]], double 1.000000e+00, double 0.000000e+00
+; COUNT0-NEXT: store double [[SELECT111]], ptr [[GETELEMENTPTR21]], align 8
+; COUNT0-NEXT: store double [[SELECT109]], ptr [[GETELEMENTPTR13]], align 8
+; COUNT0-NEXT: ret void
+;
+; COUNT1-LABEL: define void @blam
+; COUNT1-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) {
+; COUNT1-NEXT: bb:
+; COUNT1-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr double, ptr [[ARG]], i64 1
+; COUNT1-NEXT: [[LOAD:%.*]] = load double, ptr [[GETELEMENTPTR]], align 8
+; COUNT1-NEXT: [[SELECT4:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD]]
+; COUNT1-NEXT: [[LOAD7:%.*]] = load double, ptr [[ARG]], align 8
+; COUNT1-NEXT: [[SELECT10:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD7]]
+; COUNT1-NEXT: [[FCMP11:%.*]] = fcmp olt double [[LOAD2]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT128:%.*]] = select i1 [[FCMP11]], double 0.000000e+00, double [[LOAD7]]
+; COUNT1-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3
+; COUNT1-NEXT: [[GETELEMENTPTR21:%.*]] = getelementptr double, ptr [[ARG]], i64 4
+; COUNT1-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[SELECT10]], i32 0
+; COUNT1-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LOAD]], i32 1
+; COUNT1-NEXT: [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer
+; COUNT1-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> <double 1.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
+; COUNT1-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP3]]
+; COUNT1-NEXT: [[FCMP29:%.*]] = fcmp olt double [[SELECT4]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT30:%.*]] = select i1 [[FCMP29]], double 1.000000e+00, double 0.000000e+00
+; COUNT1-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; COUNT1-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; COUNT1-NEXT: [[FCMP33:%.*]] = fcmp olt double [[TMP5]], [[TMP6]]
+; COUNT1-NEXT: [[SELECT34:%.*]] = select i1 [[FCMP33]], double 0.000000e+00, double [[LOAD2]]
+; COUNT1-NEXT: [[FCMP39:%.*]] = fcmp olt double [[SELECT30]], [[SELECT128]]
+; COUNT1-NEXT: [[SELECT40:%.*]] = select i1 [[FCMP39]], double [[LOAD2]], double 0.000000e+00
+; COUNT1-NEXT: [[FCMP62:%.*]] = fcmp olt double [[SELECT34]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT639:%.*]] = select i1 [[FCMP62]], double 0.000000e+00, double 1.000000e+00
+; COUNT1-NEXT: [[FCMP76:%.*]] = fcmp olt double [[SELECT40]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT77:%.*]] = select i1 [[FCMP76]], double 0.000000e+00, double 1.000000e+00
+; COUNT1-NEXT: [[FCMP90:%.*]] = fcmp ogt double [[SELECT639]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT91:%.*]] = select i1 [[FCMP90]], double 0.000000e+00, double [[LOAD2]]
+; COUNT1-NEXT: [[FCMP92:%.*]] = fcmp ogt double [[SELECT77]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT93:%.*]] = select i1 [[FCMP92]], double 0.000000e+00, double [[LOAD2]]
+; COUNT1-NEXT: [[FCMP108:%.*]] = fcmp olt double [[SELECT93]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT109:%.*]] = select i1 [[FCMP108]], double 1.000000e+00, double 0.000000e+00
+; COUNT1-NEXT: [[FCMP110:%.*]] = fcmp olt double [[SELECT91]], 0.000000e+00
+; COUNT1-NEXT: [[SELECT111:%.*]] = select i1 [[FCMP110]], double 1.000000e+00, double 0.000000e+00
+; COUNT1-NEXT: store double [[SELECT111]], ptr [[GETELEMENTPTR21]], align 8
+; COUNT1-NEXT: store double [[SELECT109]], ptr [[GETELEMENTPTR13]], align 8
+; COUNT1-NEXT: ret void
+;
+; COUNT2-LABEL: define void @blam
+; COUNT2-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) {
+; COUNT2-NEXT: bb:
+; COUNT2-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr double, ptr [[ARG]], i64 1
+; COUNT2-NEXT: [[LOAD:%.*]] = load double, ptr [[GETELEMENTPTR]], align 8
+; COUNT2-NEXT: [[FCMP:%.*]] = fcmp olt double [[LOAD]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT3:%.*]] = select i1 [[FCMP]], double 0.000000e+00, double [[LOAD]]
+; COUNT2-NEXT: [[SELECT4:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD]]
+; COUNT2-NEXT: [[LOAD7:%.*]] = load double, ptr [[ARG]], align 8
+; COUNT2-NEXT: [[SELECT10:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD7]]
+; COUNT2-NEXT: [[FCMP11:%.*]] = fcmp olt double [[LOAD2]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT128:%.*]] = select i1 [[FCMP11]], double 0.000000e+00, double [[LOAD7]]
+; COUNT2-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3
+; COUNT2-NEXT: [[GETELEMENTPTR21:%.*]] = getelementptr double, ptr [[ARG]], i64 4
+; COUNT2-NEXT: [[FCMP23:%.*]] = fcmp olt double [[SELECT10]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT24:%.*]] = select i1 [[FCMP23]], double 0.000000e+00, double 1.000000e+00
+; COUNT2-NEXT: [[FCMP29:%.*]] = fcmp olt double [[SELECT4]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT30:%.*]] = select i1 [[FCMP29]], double 1.000000e+00, double 0.000000e+00
+; COUNT2-NEXT: [[FCMP33:%.*]] = fcmp olt double [[SELECT24]], [[SELECT3]]
+; COUNT2-NEXT: [[SELECT34:%.*]] = select i1 [[FCMP33]], double 0.000000e+00, double [[LOAD2]]
+; COUNT2-NEXT: [[FCMP39:%.*]] = fcmp olt double [[SELECT30]], [[SELECT128]]
+; COUNT2-NEXT: [[SELECT40:%.*]] = select i1 [[FCMP39]], double [[LOAD2]], double 0.000000e+00
+; COUNT2-NEXT: [[FCMP62:%.*]] = fcmp olt double [[SELECT34]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT639:%.*]] = select i1 [[FCMP62]], double 0.000000e+00, double 1.000000e+00
+; COUNT2-NEXT: [[FCMP76:%.*]] = fcmp olt double [[SELECT40]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT77:%.*]] = select i1 [[FCMP76]], double 0.000000e+00, double 1.000000e+00
+; COUNT2-NEXT: [[FCMP90:%.*]] = fcmp ogt double [[SELECT639]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT91:%.*]] = select i1 [[FCMP90]], double 0.000000e+00, double [[LOAD2]]
+; COUNT2-NEXT: [[FCMP92:%.*]] = fcmp ogt double [[SELECT77]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT93:%.*]] = select i1 [[FCMP92]], double 0.000000e+00, double [[LOAD2]]
+; COUNT2-NEXT: [[FCMP108:%.*]] = fcmp olt double [[SELECT93]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT109:%.*]] = select i1 [[FCMP108]], double 1.000000e+00, double 0.000000e+00
+; COUNT2-NEXT: [[FCMP110:%.*]] = fcmp olt double [[SELECT91]], 0.000000e+00
+; COUNT2-NEXT: [[SELECT111:%.*]] = select i1 [[FCMP110]], double 1.000000e+00, double 0.000000e+00
+; COUNT2-NEXT: store double [[SELECT111]], ptr [[GETELEMENTPTR21]], align 8
+; COUNT2-NEXT: store double [[SELECT109]], ptr [[GETELEMENTPTR13]], align 8
+; COUNT2-NEXT: ret void
+;
+; COUNT-1-LABEL: define void @blam
+; COUNT-1-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) {
+; COUNT-1-NEXT: bb:
+; COUNT-1-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr double, ptr [[ARG]], i64 1
+; COUNT-1-NEXT: [[LOAD:%.*]] = load double, ptr [[GETELEMENTPTR]], align 8
+; COUNT-1-NEXT: [[SELECT4:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD]]
+; COUNT-1-NEXT: [[LOAD7:%.*]] = load double, ptr [[ARG]], align 8
+; COUNT-1-NEXT: [[SELECT10:%.*]] = select i1 [[FCMP3]], double 0.000000e+00, double [[LOAD7]]
+; COUNT-1-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3
+; COUNT-1-NEXT: [[GETELEMENTPTR21:%.*]] = getelementptr double, ptr [[ARG]], i64 4
+; COUNT-1-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[SELECT10]], i32 0
+; COUNT-1-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LOAD]], i32 1
+; COUNT-1-NEXT: [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer
+; COUNT-1-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> <double 1.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
+; COUNT-1-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP3]]
+; COUNT-1-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LOAD2]], i32 1
+; COUNT-1-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[SELECT4]], i32 0
+; COUNT-1-NEXT: [[TMP7:%.*]] = fcmp olt <2 x double> [[TMP6]], zeroinitializer
+; COUNT-1-NEXT: [[TMP8:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[LOAD7]], i32 1
+; COUNT-1-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 1.000000e+00, double 0.000000e+00>, <2 x double> [[TMP8]]
+; COUNT-1-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; COUNT-1-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; COUNT-1-NEXT: [[FCMP33:%.*]] = fcmp olt double [[TMP10]], [[TMP11]]
+; COUNT-1-NEXT: [[SELECT34:%.*]] = select i1 [[FCMP33]], double 0.000000e+00, double [[LOAD2]]
+; COUNT-1-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
+; COUNT-1-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
+; COUNT-1-NEXT: [[FCMP39:%.*]] = fcmp olt double [[TMP12]], [[TMP13]]
+; COUNT-1-NEXT: [[SELECT40:%.*]] = select i1 [[FCMP39]], double [[LOAD2]], double 0.000000e+00
+; COUNT-1-NEXT: [[FCMP62:%.*]] = fcmp olt double [[SELECT34]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT639:%.*]] = select i1 [[FCMP62]], double 0.000000e+00, double 1.000000e+00
+; COUNT-1-NEXT: [[FCMP76:%.*]] = fcmp olt double [[SELECT40]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT77:%.*]] = select i1 [[FCMP76]], double 0.000000e+00, double 1.000000e+00
+; COUNT-1-NEXT: [[FCMP90:%.*]] = fcmp ogt double [[SELECT639]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT91:%.*]] = select i1 [[FCMP90]], double 0.000000e+00, double [[LOAD2]]
+; COUNT-1-NEXT: [[FCMP92:%.*]] = fcmp ogt double [[SELECT77]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT93:%.*]] = select i1 [[FCMP92]], double 0.000000e+00, double [[LOAD2]]
+; COUNT-1-NEXT: [[FCMP108:%.*]] = fcmp olt double [[SELECT93]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT109:%.*]] = select i1 [[FCMP108]], double 1.000000e+00, double 0.000000e+00
+; COUNT-1-NEXT: [[FCMP110:%.*]] = fcmp olt double [[SELECT91]], 0.000000e+00
+; COUNT-1-NEXT: [[SELECT111:%.*]] = select i1 [[FCMP110]], double 1.000000e+00, double 0.000000e+00
+; COUNT-1-NEXT: store double [[SELECT111]], ptr [[GETELEMENTPTR21]], align 8
+; COUNT-1-NEXT: store double [[SELECT109]], ptr [[GETELEMENTPTR13]], align 8
+; COUNT-1-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr double, ptr %arg, i64 1
+ %load = load double, ptr %getelementptr, align 8
+ %fcmp = fcmp olt double %load, 0.000000e+00
+ %select3 = select i1 %fcmp, double 0.000000e+00, double %load
+ %select4 = select i1 %fcmp3, double 0.000000e+00, double %load
+ %load7 = load double, ptr %arg, align 8
+ %select10 = select i1 %fcmp3, double 0.000000e+00, double %load7
+ %fcmp11 = fcmp olt double %load2, 0.000000e+00
+ %select128 = select i1 %fcmp11, double 0.000000e+00, double %load7
+ %getelementptr13 = getelementptr double, ptr %arg, i64 3
+ %getelementptr21 = getelementptr double, ptr %arg, i64 4
+ %fcmp23 = fcmp olt double %select10, 0.000000e+00
+ %select24 = select i1 %fcmp23, double 0.000000e+00, double 1.000000e+00
+ %fcmp29 = fcmp olt double %select4, 0.000000e+00
+ %select30 = select i1 %fcmp29, double 1.000000e+00, double 0.000000e+00
+ %fcmp33 = fcmp olt double %select24, %select3
+ %select34 = select i1 %fcmp33, double 0.000000e+00, double %load2
+ %fcmp39 = fcmp olt double %select30, %select128
+ %select40 = select i1 %fcmp39, double %load2, double 0.000000e+00
+ %fcmp62 = fcmp olt double %select34, 0.000000e+00
+ %select639 = select i1 %fcmp62, double 0.000000e+00, double 1.000000e+00
+ %fcmp76 = fcmp olt double %select40, 0.000000e+00
+ %select77 = select i1 %fcmp76, double 0.000000e+00, double 1.000000e+00
+ %fcmp90 = fcmp ogt double %select639, 0.000000e+00
+ %select91 = select i1 %fcmp90, double 0.000000e+00, double %load2
+ %fcmp92 = fcmp ogt double %select77, 0.000000e+00
+ %select93 = select i1 %fcmp92, double 0.000000e+00, double %load2
+ %fcmp108 = fcmp olt double %select93, 0.000000e+00
+ %select109 = select i1 %fcmp108, double 1.000000e+00, double 0.000000e+00
+ %fcmp110 = fcmp olt double %select91, 0.000000e+00
+ %select111 = select i1 %fcmp110, double 1.000000e+00, double 0.000000e+00
+ store double %select111, ptr %g...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks for the very quick PR!
can you add Fixes #110725
in the description?
@@ -11175,6 +11179,9 @@ bool BoUpSLP::isLoadCombineCandidate(ArrayRef<Value *> Stores) const { | |||
} | |||
|
|||
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think ideally we'd only increment the counter and force return true
in the cases where this function would return false
, but ultimately it doesn't matter too much, it just makes the counter ranges larger
Fixes #110725 Reviewers: aeubanks Reviewed By: aeubanks Pull Request: llvm/llvm-project#110734
Fixes llvm#110725 Reviewers: aeubanks Reviewed By: aeubanks Pull Request: llvm#110734
Fixes llvm#110725 Reviewers: aeubanks Reviewed By: aeubanks Pull Request: llvm#110734
Fixes llvm#110725 Reviewers: aeubanks Reviewed By: aeubanks Pull Request: llvm#110734
Fixes #110725