-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Support VPWidenCastRecipe in narrowToSingleScalarRecipes. #141080
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Mel Chen (Mel-Chen) ChangesNarrow VPWidenCastRecipe if the operand being cast is uniform. Based on #141074 Full diff: https://github.com/llvm/llvm-project/pull/141080.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 34633cd748eb1..4655e911ab3a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1175,7 +1175,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR && !isa<VPWidenRecipe>(&R))
+ if (!RepR && !isa<VPWidenRecipe, VPWidenCastRecipe>(&R))
continue;
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 28c1a6af2570b..5fb87b25c22cf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
#include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
namespace llvm {
class ScalarEvolution;
@@ -59,29 +60,37 @@ inline bool isSingleScalar(const VPValue *VPV) {
if (VPV->isLiveIn())
return true;
- if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
- const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
- // Don't consider recipes in replicate regions as uniform yet; their first
- // lane cannot be accessed when executing the replicate region for other
- // lanes.
- if (RegionOfR && RegionOfR->isReplicator())
- return false;
- return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) &&
- all_of(Rep->operands(), isSingleScalar));
- }
- if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
- return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
- if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
- return PreservesUniformity(WidenR->getOpcode()) &&
- all_of(WidenR->operands(), isSingleScalar);
- }
- if (auto *VPI = dyn_cast<VPInstruction>(VPV))
- return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
- (PreservesUniformity(VPI->getOpcode()) &&
- all_of(VPI->operands(), isSingleScalar));
-
- // VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
- return isa<VPExpandSCEVRecipe>(VPV);
+ return TypeSwitch<const VPValue *, bool>(VPV)
+ .Case<VPReplicateRecipe>([&](const auto *Rep) {
+ const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
+ // Don't consider recipes in replicate regions as uniform yet; their
+ // first lane cannot be accessed when executing the replicate region for
+ // other lanes.
+ if (RegionOfR && RegionOfR->isReplicator())
+ return false;
+ return Rep->isSingleScalar() ||
+ (PreservesUniformity(Rep->getOpcode()) &&
+ all_of(Rep->operands(), isSingleScalar));
+ })
+ .Case<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(
+ [&](const auto *R) {
+ return all_of(R->getDefiningRecipe()->operands(), isSingleScalar);
+ })
+ .Case<VPWidenRecipe, VPWidenCastRecipe>([&](const auto *WidenR) {
+ return PreservesUniformity(WidenR->getOpcode()) &&
+ all_of(WidenR->operands(), isSingleScalar);
+ })
+ .Case<VPInstruction>([&](const auto *VPI) {
+ return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
+ (PreservesUniformity(VPI->getOpcode()) &&
+ all_of(VPI->operands(), isSingleScalar));
+ })
+ .Case<VPExpandSCEVRecipe>([](const VPValue *) {
+ // VPExpandSCEVRecipes must be placed in the entry and are alway
+ // uniform.
+ return true;
+ })
+ .Default([](const VPValue *) { return false; });
}
/// Return true if \p V is a header mask in \p Plan.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 7c42c3d9cd52e..fca0caa5cce31 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -378,8 +378,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i8
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if this can be extended to WidenIntrinsic as well, perhaps as a follow-up?
c88e10b
to
d0cd550
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Mel-Chen I think this one should be good to merge?
I suspect
|
d0cd550
to
515771b
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
515771b
to
b6a1865
Compare
b6a1865
to
67b238c
Compare
Ah yes, I remember seeing that when I originally added |
Narrow VPWidenCastRecipe if the operand being cast is uniform.