Skip to content

Commit de176d8

Browse files
authored
[SCEV][LV] Invalidate LCSSA exit phis more thoroughly (#69909)
This an alternative to #69886. The basic problem is that SCEV can look through trivial LCSSA phis. When the phi node later becomes non-trivial, we do invalidate it, but this doesn't catch uses that are not covered by the IR use-def walk, such as those in BECounts. Fix this by adding a special invalidation method for LCSSA phis, which will also invalidate all the SCEVUnknowns/SCEVAddRecExprs used by the LCSSA phi node and defined in the loop. We should probably also use this invalidation method in other places that add predecessors to exit blocks, such as loop unrolling and loop peeling. Fixes #69097. Fixes #66616. Fixes #63970.
1 parent 47b8763 commit de176d8

File tree

4 files changed

+138
-1
lines changed

4 files changed

+138
-1
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,10 @@ class ScalarEvolution {
943943
/// def-use chain linking it to a loop.
944944
void forgetValue(Value *V);
945945

946+
/// Forget LCSSA phi node V of loop L to which a new predecessor was added,
947+
/// such that it may no longer be trivial.
948+
void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V);
949+
946950
/// Called when the client has changed the disposition of values in
947951
/// this loop.
948952
///

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8410,6 +8410,44 @@ void ScalarEvolution::forgetValue(Value *V) {
84108410
forgetMemoizedResults(ToForget);
84118411
}
84128412

8413+
void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) {
8414+
if (!isSCEVable(V->getType()))
8415+
return;
8416+
8417+
// If SCEV looked through a trivial LCSSA phi node, we might have SCEV's
8418+
// directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an
8419+
// extra predecessor is added, this is no longer valid. Find all Unknowns and
8420+
// AddRecs defined in the loop and invalidate any SCEV's making use of them.
8421+
if (const SCEV *S = getExistingSCEV(V)) {
8422+
struct InvalidationRootCollector {
8423+
Loop *L;
8424+
SmallVector<const SCEV *, 8> Roots;
8425+
8426+
InvalidationRootCollector(Loop *L) : L(L) {}
8427+
8428+
bool follow(const SCEV *S) {
8429+
if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
8430+
if (auto *I = dyn_cast<Instruction>(SU->getValue()))
8431+
if (L->contains(I))
8432+
Roots.push_back(S);
8433+
} else if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
8434+
if (L->contains(AddRec->getLoop()))
8435+
Roots.push_back(S);
8436+
}
8437+
return true;
8438+
}
8439+
bool isDone() const { return false; }
8440+
};
8441+
8442+
InvalidationRootCollector C(L);
8443+
visitAll(S, C);
8444+
forgetMemoizedResults(C.Roots);
8445+
}
8446+
8447+
// Also perform the normal invalidation.
8448+
forgetValue(V);
8449+
}
8450+
84138451
void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); }
84148452

84158453
void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3599,7 +3599,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
35993599
OrigLoop->getExitBlocks(ExitBlocks);
36003600
for (BasicBlock *Exit : ExitBlocks)
36013601
for (PHINode &PN : Exit->phis())
3602-
PSE.getSE()->forgetValue(&PN);
3602+
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
36033603

36043604
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
36053605
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -passes="print<scalar-evolution>,loop-vectorize" --verify-scev -S < %s -force-vector-width=4 2>/dev/null | FileCheck %s
3+
4+
; Make sure users of SCEVUnknowns from the scalar loop are invalidated.
5+
6+
define void @pr66616(ptr %ptr) {
7+
; CHECK-LABEL: define void @pr66616(
8+
; CHECK-SAME: ptr [[PTR:%.*]]) {
9+
; CHECK-NEXT: entry:
10+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
11+
; CHECK: vector.ph:
12+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
13+
; CHECK: vector.body:
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
16+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
17+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
18+
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1, i32 1, i32 1>
19+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
20+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
21+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
22+
; CHECK: middle.block:
23+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
24+
; CHECK-NEXT: br i1 true, label [[PREHEADER:%.*]], label [[SCALAR_PH]]
25+
; CHECK: scalar.ph:
26+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
27+
; CHECK-NEXT: br label [[LOOP_1:%.*]]
28+
; CHECK: loop.1:
29+
; CHECK-NEXT: [[IV_1:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ]
30+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4
31+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[LOAD]], 1
32+
; CHECK-NEXT: [[INC]] = add i8 [[IV_1]], 1
33+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[INC]], 0
34+
; CHECK-NEXT: br i1 [[COND1]], label [[PREHEADER]], label [[LOOP_1]], !llvm.loop [[LOOP3:![0-9]+]]
35+
; CHECK: preheader:
36+
; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[LOOP_1]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
37+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[ADD3_LCSSA]]
38+
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
39+
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
40+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4
41+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH2:%.*]], label [[VECTOR_PH3:%.*]]
42+
; CHECK: vector.ph3:
43+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4
44+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]]
45+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
46+
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]]
47+
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]]
48+
; CHECK-NEXT: br label [[VECTOR_BODY7:%.*]]
49+
; CHECK: vector.body7:
50+
; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ]
51+
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4
52+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
53+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY7]], !llvm.loop [[LOOP4:![0-9]+]]
54+
; CHECK: middle.block1:
55+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
56+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH2]]
57+
; CHECK: scalar.ph2:
58+
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK1]] ], [ [[ADD3_LCSSA]], [[PREHEADER]] ]
59+
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK1]] ], [ [[PTR]], [[PREHEADER]] ]
60+
; CHECK-NEXT: br label [[LOOP_2:%.*]]
61+
; CHECK: loop.2:
62+
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH2]] ]
63+
; CHECK-NEXT: [[IV_3:%.*]] = phi ptr [ [[IV_3_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH2]] ]
64+
; CHECK-NEXT: [[IV_2_I]] = add i32 [[IV_2]], 1
65+
; CHECK-NEXT: [[IV_3_I]] = getelementptr i8, ptr [[IV_3]], i64 1
66+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[IV_2]], 0
67+
; CHECK-NEXT: br i1 [[COND2]], label [[EXIT]], label [[LOOP_2]], !llvm.loop [[LOOP5:![0-9]+]]
68+
; CHECK: exit:
69+
; CHECK-NEXT: ret void
70+
;
71+
entry:
72+
br label %loop.1
73+
74+
loop.1:
75+
%iv.1 = phi i8 [ 0, %entry ], [ %inc, %loop.1 ]
76+
%load = load i32, ptr %ptr, align 4
77+
%add3 = add i32 %load, 1
78+
%inc = add i8 %iv.1, 1
79+
%cond1 = icmp eq i8 %inc, 0
80+
br i1 %cond1, label %preheader, label %loop.1
81+
82+
preheader:
83+
br label %loop.2
84+
85+
loop.2:
86+
%iv.2 = phi i32 [ %iv.2.i, %loop.2 ], [ %add3, %preheader ]
87+
%iv.3 = phi ptr [ %iv.3.i, %loop.2 ], [ %ptr, %preheader ]
88+
%iv.2.i = add i32 %iv.2, 1
89+
%iv.3.i = getelementptr i8, ptr %iv.3, i64 1
90+
%cond2 = icmp eq i32 %iv.2, 0
91+
br i1 %cond2, label %exit, label %loop.2
92+
93+
exit:
94+
ret void
95+
}

0 commit comments

Comments
 (0)