-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SandboxVec][Legality] Don't vectorize when instructions repeat #124479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch adds a legality check that checks for repeated instrs in a bundle and won't vectorize if such pattern is found.
@llvm/pr-subscribers-llvm-transforms Author: vporpo (vporpo) ChangesThis patch adds a legality check that checks for repeated instrs in a bundle and won't vectorize if such pattern is found. Full diff: https://github.com/llvm/llvm-project/pull/124479.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 156b788d8a2038..132b12a7b4e6c0 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -92,6 +92,7 @@ enum class ResultReason {
DiffMathFlags,
DiffWrapFlags,
DiffBBs,
+ RepeatedInstrs,
NotConsecutive,
CantSchedule,
Unimplemented,
@@ -130,6 +131,8 @@ struct ToStr {
return "DiffWrapFlags";
case ResultReason::DiffBBs:
return "DiffBBs";
+ case ResultReason::RepeatedInstrs:
+ return "RepeatedInstrs";
case ResultReason::NotConsecutive:
return "NotConsecutive";
case ResultReason::CantSchedule:
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
index 48bc246e4b56a9..62be90aee4e0e0 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
@@ -219,6 +219,10 @@ const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef<Value *> Bndl,
if (any_of(drop_begin(Bndl),
[BB](auto *V) { return cast<Instruction>(V)->getParent() != BB; }))
return createLegalityResult<Pack>(ResultReason::DiffBBs);
+ // Pack if instructions repeat, i.e., require some sort of broadcast.
+ SmallPtrSet<Value *, 8> Unique(Bndl.begin(), Bndl.end());
+ if (Unique.size() != Bndl.size())
+ return createLegalityResult<Pack>(ResultReason::RepeatedInstrs);
auto CollectDescrs = getHowToCollectValues(Bndl);
if (CollectDescrs.hasVectorInputs()) {
diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
new file mode 100644
index 00000000000000..6026e92ef9a824
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+define i32 @repeated_splat(ptr %ptr, i32 %v) #0 {
+; CHECK-LABEL: define i32 @repeated_splat(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
+; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
+; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1
+; CHECK-NEXT: [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]]
+; CHECK-NEXT: store <2 x i32> [[VEC]], ptr [[GEP0]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
+ %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
+ %ld0 = load i32, ptr %gep0, align 4
+ %ld1 = load i32, ptr %gep1, align 4
+ %splat = add i32 %v, 0
+ %add0 = mul i32 %ld0, %splat
+ %add1 = mul i32 %ld1, %splat
+ store i32 %add0, ptr %gep0, align 4
+ store i32 %add1, ptr %gep1, align 4
+ ret i32 0
+}
+
+define i32 @repeated_partial(ptr %ptr, i32 %v) #0 {
+; CHECK-LABEL: define i32 @repeated_partial(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3
+; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[LD0]], i32 0
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[LD1]], i32 1
+; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2
+; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3
+; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
+; CHECK-NEXT: [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]]
+; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[GEP0]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
+ %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
+ %gep2 = getelementptr inbounds i32, ptr %ptr, i64 2
+ %gep3 = getelementptr inbounds i32, ptr %ptr, i64 3
+ %ld0 = load i32, ptr %gep0, align 4
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld2 = load i32, ptr %gep2, align 4
+ %ld3 = load i32, ptr %gep3, align 4
+ %splat = add i32 %v, 0
+ %add0 = mul i32 %ld0, %ld0
+ %add1 = mul i32 %ld1, %ld1
+ %add2 = mul i32 %ld2, %ld1
+ %add3 = mul i32 %ld3, %ld3
+ store i32 %add0, ptr %gep0, align 4
+ store i32 %add1, ptr %gep1, align 4
+ store i32 %add2, ptr %gep2, align 4
+ store i32 %add3, ptr %gep3, align 4
+ ret i32 0
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
index acc887f9dc6c1d..3c24214f0d87f2 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -225,6 +225,22 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float
Legality.canVectorize({Ld0, Ld1}, /*SkipScheduling=*/true);
EXPECT_TRUE(isa<sandboxir::Widen>(Result));
}
+ {
+ // Check Repeated instructions (splat)
+ const auto &Result =
+ Legality.canVectorize({Ld0, Ld0}, /*SkipScheduling=*/true);
+ EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+ EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+ sandboxir::ResultReason::RepeatedInstrs);
+ }
+ {
+ // Check Repeated instructions (not splat)
+ const auto &Result =
+ Legality.canVectorize({Ld0, Ld1, Ld0}, /*SkipScheduling=*/true);
+ EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+ EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+ sandboxir::ResultReason::RepeatedInstrs);
+ }
}
TEST_F(LegalityTest, LegalitySchedule) {
|
@llvm/pr-subscribers-vectorizers Author: vporpo (vporpo) ChangesThis patch adds a legality check that checks for repeated instrs in a bundle and won't vectorize if such pattern is found. Full diff: https://github.com/llvm/llvm-project/pull/124479.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 156b788d8a2038..132b12a7b4e6c0 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -92,6 +92,7 @@ enum class ResultReason {
DiffMathFlags,
DiffWrapFlags,
DiffBBs,
+ RepeatedInstrs,
NotConsecutive,
CantSchedule,
Unimplemented,
@@ -130,6 +131,8 @@ struct ToStr {
return "DiffWrapFlags";
case ResultReason::DiffBBs:
return "DiffBBs";
+ case ResultReason::RepeatedInstrs:
+ return "RepeatedInstrs";
case ResultReason::NotConsecutive:
return "NotConsecutive";
case ResultReason::CantSchedule:
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
index 48bc246e4b56a9..62be90aee4e0e0 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
@@ -219,6 +219,10 @@ const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef<Value *> Bndl,
if (any_of(drop_begin(Bndl),
[BB](auto *V) { return cast<Instruction>(V)->getParent() != BB; }))
return createLegalityResult<Pack>(ResultReason::DiffBBs);
+ // Pack if instructions repeat, i.e., require some sort of broadcast.
+ SmallPtrSet<Value *, 8> Unique(Bndl.begin(), Bndl.end());
+ if (Unique.size() != Bndl.size())
+ return createLegalityResult<Pack>(ResultReason::RepeatedInstrs);
auto CollectDescrs = getHowToCollectValues(Bndl);
if (CollectDescrs.hasVectorInputs()) {
diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
new file mode 100644
index 00000000000000..6026e92ef9a824
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+define i32 @repeated_splat(ptr %ptr, i32 %v) #0 {
+; CHECK-LABEL: define i32 @repeated_splat(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
+; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
+; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1
+; CHECK-NEXT: [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]]
+; CHECK-NEXT: store <2 x i32> [[VEC]], ptr [[GEP0]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
+ %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
+ %ld0 = load i32, ptr %gep0, align 4
+ %ld1 = load i32, ptr %gep1, align 4
+ %splat = add i32 %v, 0
+ %add0 = mul i32 %ld0, %splat
+ %add1 = mul i32 %ld1, %splat
+ store i32 %add0, ptr %gep0, align 4
+ store i32 %add1, ptr %gep1, align 4
+ ret i32 0
+}
+
+define i32 @repeated_partial(ptr %ptr, i32 %v) #0 {
+; CHECK-LABEL: define i32 @repeated_partial(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3
+; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4
+; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[LD0]], i32 0
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[LD1]], i32 1
+; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2
+; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3
+; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
+; CHECK-NEXT: [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]]
+; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[GEP0]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
+ %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
+ %gep2 = getelementptr inbounds i32, ptr %ptr, i64 2
+ %gep3 = getelementptr inbounds i32, ptr %ptr, i64 3
+ %ld0 = load i32, ptr %gep0, align 4
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld2 = load i32, ptr %gep2, align 4
+ %ld3 = load i32, ptr %gep3, align 4
+ %splat = add i32 %v, 0
+ %add0 = mul i32 %ld0, %ld0
+ %add1 = mul i32 %ld1, %ld1
+ %add2 = mul i32 %ld2, %ld1
+ %add3 = mul i32 %ld3, %ld3
+ store i32 %add0, ptr %gep0, align 4
+ store i32 %add1, ptr %gep1, align 4
+ store i32 %add2, ptr %gep2, align 4
+ store i32 %add3, ptr %gep3, align 4
+ ret i32 0
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
index acc887f9dc6c1d..3c24214f0d87f2 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -225,6 +225,22 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float
Legality.canVectorize({Ld0, Ld1}, /*SkipScheduling=*/true);
EXPECT_TRUE(isa<sandboxir::Widen>(Result));
}
+ {
+ // Check Repeated instructions (splat)
+ const auto &Result =
+ Legality.canVectorize({Ld0, Ld0}, /*SkipScheduling=*/true);
+ EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+ EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+ sandboxir::ResultReason::RepeatedInstrs);
+ }
+ {
+ // Check Repeated instructions (not splat)
+ const auto &Result =
+ Legality.canVectorize({Ld0, Ld1, Ld0}, /*SkipScheduling=*/true);
+ EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+ EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+ sandboxir::ResultReason::RepeatedInstrs);
+ }
}
TEST_F(LegalityTest, LegalitySchedule) {
|
%gep3 = getelementptr inbounds i32, ptr %ptr, i64 3 | ||
%ld0 = load i32, ptr %gep0, align 4 | ||
%ld1 = load i32, ptr %gep1, align 4 | ||
%ld2 = load i32, ptr %gep2, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ld2 is not dead but is never generated in the IR, what am I missing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I got it, okay.
%ld1 = load i32, ptr %gep1, align 4 | ||
%ld2 = load i32, ptr %gep2, align 4 | ||
%ld3 = load i32, ptr %gep3, align 4 | ||
%splat = add i32 %v, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not just remove the splat?
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/50/builds/9597 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/12693 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/76/builds/6528 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/72/builds/7706 Here is the relevant piece of the build log for the reference
|
This patch adds a legality check that checks for repeated instrs in a bundle and won't vectorize if such pattern is found.