Skip to content

Commit c0a08ae

Browse files
committed
[DSE] Add predicated vector length store support for masked store elimination
In isMaskedStoreOverwrite we process two stores that fully overwrite one another, here we add support for predicated vector length stores so that DSE will eliminate this variant of masked stores.
1 parent 616f447 commit c0a08ae

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,42 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
269269
return OW_Unknown;
270270
return OW_Complete;
271271
}
272+
if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
273+
// Operands {0 , 1 , 2 , 3 }
274+
// {StoredVal, VecPtr, Mask, VL}
275+
// Types.
276+
VectorType *KillingTy =
277+
cast<VectorType>(KillingII->getArgOperand(0)->getType());
278+
VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
279+
if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
280+
return OW_Unknown;
281+
// Element count.
282+
if (KillingTy->getElementCount() != DeadTy->getElementCount())
283+
return OW_Unknown;
284+
// Pointers.
285+
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
286+
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
287+
if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
288+
return OW_Unknown;
289+
// Masks.
290+
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
291+
if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
292+
return OW_Unknown;
293+
// Lengths.
294+
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
295+
return OW_Unknown;
296+
AAMDNodes KillingAA = KillingII->getAAMetadata();
297+
AAMDNodes DeadAA = DeadII->getAAMetadata();
298+
// There must be scoped noalias metadata on both stores.
299+
if (!KillingAA.Scope || !DeadAA.Scope ||
300+
!KillingAA.NoAlias || !DeadAA.NoAlias)
301+
return OW_Unknown;
302+
// Check that both stores have the same scope and noalias metadata.
303+
if (KillingAA.Scope != DeadAA.Scope ||
304+
KillingAA.NoAlias != DeadAA.NoAlias)
305+
return OW_Unknown;
306+
return OW_Complete;
307+
}
272308
return OW_Unknown;
273309
}
274310

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=dse -S < %s | FileCheck %s
3+
target triple = "riscv64-unknown-linux-gnu"
4+
5+
; Test predicated vector length masked stores for elimination
6+
7+
define void @foo(ptr %a, i32 %vl, <vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2) {
8+
;
9+
; CHECK-LABEL: @foo(
10+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> [[V1:%.*]], <vscale x 8 x i32> [[V2:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL:%.*]])
11+
; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> [[VP_OP]], ptr nonnull [[A:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[VL]]), !alias.scope [[META0:![0-9]+]], !noalias [[META5:![0-9]+]]
12+
; CHECK-NEXT: ret void
13+
;
14+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %v1, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !tbaa !16, !alias.scope !34, !noalias !37
15+
%vp.op = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %v1, <vscale x 8 x i32> %v2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl)
16+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %vp.op, ptr nonnull %a, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %vl), !alias.scope !34, !noalias !37
17+
ret void
18+
}
19+
20+
declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
21+
declare void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32>, ptr nocapture, <vscale x 8 x i1>, i32)
22+
23+
!11 = !{!"omnipotent char", !12, i64 0}
24+
!12 = !{!"Simple C/C++ TBAA"}
25+
!13 = !{!"int", !11, i64 0}
26+
!16 = !{!13, !13, i64 0}
27+
!28 = distinct !{!28, !"LVerDomain"}
28+
!30 = distinct !{!30, !"LVerDomain"}
29+
!34 = !{!35, !36}
30+
!35 = distinct !{!35, !28}
31+
!36 = distinct !{!36, !30}
32+
!37 = !{!38, !39}
33+
!38 = distinct !{!38, !28}
34+
!39 = distinct !{!39, !28}

0 commit comments

Comments
 (0)