Skip to content

Commit 80e8025

Browse files
committed
[LV] Workaround PR49900 (a crash due to analyzing partially mutated IR)
LoopVectorize has a fairly deeply baked in design problem where it will try to query analysis (primarily SCEV, but also ValueTracking) in the midst of mutating IR. In particular, the intermediate IR state does not represent the semantics of the original (or final) program. Fixing this for real is hard, but all of the cases seen so far share a common symptom. In cases seen to date, the analysis being queried is the computation of the original loop's trip count. We can fix this particular instance of the issue by simply computing the trip count early, and caching it. I want to be really clear that this is nothing but a workaround. It does nothing to fix the root issue, and at best, delays the time until we have to fix this for real. Florian and I have discussed an eventual solution in the review comments for https://reviews.llvm.org/D100663, but it's a lot of work. Test taken from https://reviews.llvm.org/D100663. Differential Revision: https://reviews.llvm.org/D101487
1 parent 9586121 commit 80e8025

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3633,6 +3633,15 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
36333633
// Get the metadata of the original loop before it gets modified.
36343634
MDNode *OrigLoopID = OrigLoop->getLoopID();
36353635

3636+
// Workaround! Compute the trip count of the original loop and cache it
3637+
// before we start modifying the CFG. This code has a systemic problem
3638+
// wherein it tries to run analysis over partially constructed IR; this is
3639+
// wrong, and not simply for SCEV. The trip count of the original loop
3640+
// simply happens to be prone to hitting this in practice. In theory, we
3641+
// can hit the same issue for any SCEV, or ValueTracking query done during
3642+
// mutation. See PR49900.
3643+
getOrCreateTripCount(OrigLoop);
3644+
36363645
// Create an empty vector loop, and prepare basic blocks for the runtime
36373646
// checks.
36383647
Loop *Lp = createVectorLoopSkeleton("");
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -scev-verify-ir -S %s | FileCheck %s
2+
3+
; Make sure SCEV is not queried while the IR is temporarily invalid. The tests
4+
; deliberately do not check for details of the vectorized IR, because that's
5+
; not the focus of the test.
6+
7+
define void @pr49538() {
8+
; CHECK-LABEL: @pr49538
9+
; CHECK: vector.body:
10+
;
11+
entry:
12+
br label %loop.0
13+
14+
loop.0:
15+
%iv.0 = phi i16 [ -1, %entry ], [ %iv.0.next, %loop.0.latch ]
16+
br label %loop.1
17+
18+
loop.1:
19+
%iv.1 = phi i16 [ -1, %loop.0 ], [ %iv.1.next, %loop.1 ]
20+
%iv.1.next = add nsw i16 %iv.1, 1
21+
%i6 = icmp eq i16 %iv.1.next, %iv.0
22+
br i1 %i6, label %loop.0.latch, label %loop.1
23+
24+
loop.0.latch:
25+
%i8 = phi i16 [ 1, %loop.1 ]
26+
%iv.0.next = add nsw i16 %iv.0, 1
27+
%ec.0 = icmp eq i16 %iv.0.next, %i8
28+
br i1 %ec.0, label %exit, label %loop.0
29+
30+
exit:
31+
ret void
32+
}
33+
34+
define void @pr49900(i32 %x, i64* %ptr) {
35+
; CHECK-LABEL: @pr49900
36+
; CHECK: vector.body{{.*}}:
37+
; CHECK: vector.body{{.*}}:
38+
;
39+
entry:
40+
br label %loop.0
41+
42+
loop.0: ; preds = %bb2, %bb
43+
%ec.0 = icmp slt i32 %x, 0
44+
br i1 %ec.0, label %loop.0, label %loop.1.ph
45+
46+
loop.1.ph: ; preds = %bb2
47+
br label %loop.1
48+
49+
loop.1: ; preds = %bb33, %bb5
50+
%iv.1 = phi i32 [ 0, %loop.1.ph ], [ %iv.3.next, %loop.1.latch ]
51+
br label %loop.2
52+
53+
loop.2:
54+
%iv.2 = phi i32 [ %iv.1, %loop.1 ], [ %iv.2.next, %loop.2 ]
55+
%tmp54 = add i32 %iv.2, 12
56+
%iv.2.next = add i32 %iv.2, 13
57+
%ext = zext i32 %iv.2.next to i64
58+
%tmp56 = add nuw nsw i64 %ext, 1
59+
%C6 = icmp sle i32 %tmp54, 65536
60+
br i1 %C6, label %loop.2, label %loop.3.ph
61+
62+
loop.3.ph:
63+
br label %loop.3
64+
65+
loop.3:
66+
%iv.3 = phi i32 [ %iv.2.next, %loop.3.ph ], [ %iv.3.next, %loop.3 ]
67+
%iv.3.next = add i32 %iv.3 , 13
68+
%C1 = icmp ult i32 %iv.3.next, 65536
69+
br i1 %C1, label %loop.3, label %loop.1.latch
70+
71+
loop.1.latch:
72+
%ec = icmp ne i32 %iv.1, 9999
73+
br i1 %ec, label %loop.1, label %exit
74+
75+
exit:
76+
ret void
77+
}

0 commit comments

Comments
 (0)