-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DSE] Add predicated vector length store support for masked store elimination #134175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Michael Berg (mcberg2021) Changes…mination In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores. This is the following up installment mentioned in: https://reviews.llvm.org/D132700 Full diff: https://github.com/llvm/llvm-project/pull/134175.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 935f21fd484f3..22eaeeafcf786 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -269,6 +269,42 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
return OW_Unknown;
return OW_Complete;
}
+ if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
+ // Operands {0 , 1 , 2 , 3 }
+ // {StoredVal, VecPtr, Mask, VL}
+ // Types.
+ VectorType *KillingTy =
+ cast<VectorType>(KillingII->getArgOperand(0)->getType());
+ VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
+ if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
+ return OW_Unknown;
+ // Element count.
+ if (KillingTy->getElementCount() != DeadTy->getElementCount())
+ return OW_Unknown;
+ // Pointers.
+ Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+ Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+ if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
+ return OW_Unknown;
+ // Masks.
+ // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+ if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
+ return OW_Unknown;
+ // Lengths.
+ if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
+ return OW_Unknown;
+ AAMDNodes KillingAA = KillingII->getAAMetadata();
+ AAMDNodes DeadAA = DeadII->getAAMetadata();
+ // There must be scoped noalias metadata on both stores.
+ if (!KillingAA.Scope || !DeadAA.Scope ||
+ !KillingAA.NoAlias || !DeadAA.NoAlias)
+ return OW_Unknown;
+ // Check that both stores have the same scope and noalias metadata.
+ if (KillingAA.Scope != DeadAA.Scope ||
+ KillingAA.NoAlias != DeadAA.NoAlias)
+ return OW_Unknown;
+ return OW_Complete;
+ }
return OW_Unknown;
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll
new file mode 100644
index 0000000000000..825acd16a9d2a
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-vp.store.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=dse -S < %s | FileCheck %s
+target triple = "riscv64-unknown-linux-gnu"
+
+; Test predicated vector length masked stores for elimination
+
+define void @foo(ptr %a, i32 %vl, <vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2) {
+;
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> [[V1:%.*]], <vscale x 8 x i32> [[V2:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL:%.*]])
+; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> [[VP_OP]], ptr nonnull [[A:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL]]), !alias.scope [[META0:![0-9]+]], !noalias [[META5:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %v1, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !tbaa !16, !alias.scope !34, !noalias !37
+ %vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl)
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !alias.scope !34, !noalias !37
+ ret void
+}
+
+declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32>, ptr nocapture, <vscale x 8 x i1>, i32)
+
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C/C++ TBAA"}
+!13 = !{!"int", !11, i64 0}
+!16 = !{!13, !13, i64 0}
+!28 = distinct !{!28, !"LVerDomain"}
+!30 = distinct !{!30, !"LVerDomain"}
+!34 = !{!35, !36}
+!35 = distinct !{!35, !28}
+!36 = distinct !{!36, !30}
+!37 = !{!38, !39}
+!38 = distinct !{!38, !28}
+!39 = distinct !{!39, !28}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
c0a08ae
to
5d1d22a
Compare
@@ -0,0 +1,34 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | |||
; RUN: opt -passes=dse -S < %s | FileCheck %s | |||
target triple = "riscv64-unknown-linux-gnu" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop triple.
; | ||
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %v1, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !tbaa !16, !alias.scope !34, !noalias !37 | ||
%vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl) | ||
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !alias.scope !34, !noalias !37 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please convert these to use splat
expressions? I find it very hard to understand what is going on in this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, the test is pretty old...
// Operands {0 , 1 , 2 , 3 } | ||
// {StoredVal, VecPtr, Mask, VL} | ||
// Types. | ||
VectorType *KillingTy = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VectorType *KillingTy = | |
auto *KillingTy = |
Use auto with cast.
VectorType *KillingTy = | ||
cast<VectorType>(KillingII->getArgOperand(0)->getType()); | ||
VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType()); | ||
if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This won't work correctly for vectors of pointers. Need to use DL.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess the masked store handling earlier in this function has the same issue?
return OW_Unknown; | ||
// Pointers. | ||
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts(); | ||
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stripPointerCasts not needed.
return OW_Unknown; | ||
// Check that both stores have the same scope and noalias metadata. | ||
if (KillingAA.Scope != DeadAA.Scope || KillingAA.NoAlias != DeadAA.NoAlias) | ||
return OW_Unknown; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm really confused by what you're trying to do with the AA metadata here. Why do they need to have matching noalias/alias.scope metadata?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was mostly a constraint for the same store, I will try it without this part (internally).
5d1d22a
to
020b5fe
Compare
I updated the code to share between the two intrinsics, the segment in question is removed, my testing shows its safe not to have it. The test was simplified too. |
020b5fe
to
a691e5d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this looks better.
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts(); | ||
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts(); | ||
auto *KillingPtr = KillingII->getArgOperand(1); | ||
auto *DeadPtr = DeadII->getArgOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can keep Value
here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok
%vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> splat (i1 true), i32 %vl) | ||
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> splat (i1 true), i32 %vl) | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should have a negative test for different vl at least (but probably best to also do different mask, different pointer, different element size, even if those are the same as for masked stores).
…mination In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores.
a691e5d
to
526a862
Compare
Additional comments? Or ready to approve? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
…mination (llvm#134175) In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores. This is the follow up installment mentioned in: https://reviews.llvm.org/D132700
…mination (llvm#134175) In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores. This is the follow up installment mentioned in: https://reviews.llvm.org/D132700
In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores.
This is the follow up installment mentioned in: https://reviews.llvm.org/D132700