Skip to content

Commit 020b5fe

Browse files
committed
[DSE] Add predicated vector length store support for masked store elimination
In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores.
1 parent 616f447 commit 020b5fe

File tree

2 files changed

+62
-14
lines changed

2 files changed

+62
-14
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,28 +248,43 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
248248
return OW_Unknown;
249249
if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
250250
return OW_Unknown;
251-
if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
252-
// Type size.
253-
VectorType *KillingTy =
254-
cast<VectorType>(KillingII->getArgOperand(0)->getType());
255-
VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
256-
if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251+
252+
switch (KillingII->getIntrinsicID()) {
253+
case Intrinsic::masked_store:
254+
case Intrinsic::vp_store: {
255+
const DataLayout &DL = KillingII->getDataLayout();
256+
auto *KillingTy = KillingII->getArgOperand(0)->getType();
257+
auto *DeadTy = DeadII->getArgOperand(0)->getType();
258+
if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
257259
return OW_Unknown;
258260
// Element count.
259-
if (KillingTy->getElementCount() != DeadTy->getElementCount())
261+
if (cast<VectorType>(KillingTy)->getElementCount() !=
262+
cast<VectorType>(DeadTy)->getElementCount())
260263
return OW_Unknown;
261264
// Pointers.
262-
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
263-
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
265+
auto *KillingPtr = KillingII->getArgOperand(1);
266+
auto *DeadPtr = DeadII->getArgOperand(1);
264267
if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
265268
return OW_Unknown;
266-
// Masks.
267-
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
268-
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
269-
return OW_Unknown;
269+
if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
270+
// Masks.
271+
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
272+
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
273+
return OW_Unknown;
274+
} else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
275+
// Masks.
276+
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
277+
if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
278+
return OW_Unknown;
279+
// Lengths.
280+
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
281+
return OW_Unknown;
282+
}
270283
return OW_Complete;
271284
}
272-
return OW_Unknown;
285+
default:
286+
return OW_Unknown;
287+
}
273288
}
274289

275290
/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=dse -S < %s | FileCheck %s
3+
4+
; Test predicated vector length masked stores for elimination
5+
6+
define void @foo(ptr %a, i32 %vl, <vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2) {
7+
;
8+
; CHECK-LABEL: @foo(
9+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> [[V1:%.*]], <vscale x 8 x i32> [[V2:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL:%.*]])
10+
; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> [[VP_OP]], ptr nonnull [[A:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL]]), !alias.scope [[META0:![0-9]+]], !noalias [[META5:![0-9]+]]
11+
; CHECK-NEXT: ret void
12+
;
13+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %v1, ptr nonnull %a, <vscale x 8 x i1> splat (i1 true), i32 %vl), !alias.scope !34, !noalias !37
14+
%vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> splat (i1 true), i32 %vl)
15+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> splat (i1 true), i32 %vl), !alias.scope !34, !noalias !37
16+
ret void
17+
}
18+
19+
declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
20+
declare void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32>, ptr nocapture, <vscale x 8 x i1>, i32)
21+
22+
!11 = !{!"omnipotent char", !12, i64 0}
23+
!12 = !{!"Simple C/C++ TBAA"}
24+
!13 = !{!"int", !11, i64 0}
25+
!16 = !{!13, !13, i64 0}
26+
!28 = distinct !{!28, !"LVerDomain"}
27+
!30 = distinct !{!30, !"LVerDomain"}
28+
!34 = !{!35, !36}
29+
!35 = distinct !{!35, !28}
30+
!36 = distinct !{!36, !30}
31+
!37 = !{!38, !39}
32+
!38 = distinct !{!38, !28}
33+
!39 = distinct !{!39, !28}

0 commit comments

Comments
 (0)