Skip to content

Commit 166937b

Browse files
committed
[LV] Cleanup after expanding SCEV predicate to constant.
In some cases, SCEV isn't able to prove that no wrap checks are needed, while constant folding in SCEVExpander can. In those cases, we may leave around IR for computing the trip count, which is unused at this point but may be re-used later, triggering an assertion when trying to clean up SCEVExp after vectorization. Directly run the cleaner after expanding to a constant predicate to prevent any generated code from being re-used. Fixes #131281.
1 parent 887cf1f commit 166937b

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,6 +1912,12 @@ class GeneratedRTChecks {
19121912

19131913
SCEVCheckCond = SCEVExp.expandCodeForPredicate(
19141914
&UnionPred, SCEVCheckBlock->getTerminator());
1915+
if (isa<Constant>(SCEVCheckCond)) {
1916+
// Clean up directly after expanding the predicate to a constant, to
1917+
// avoid further expansions re-using anything left over from SCEVExp.
1918+
SCEVExpanderCleaner SCEVCleaner(SCEVExp);
1919+
SCEVCleaner.cleanup();
1920+
}
19151921
}
19161922

19171923
const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();

llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,84 @@ loop:
241241
exit:
242242
ret void
243243
}
244+
245+
declare i1 @cond()
246+
247+
; Test case for https://github.com/llvm/llvm-project/issues/131281.
248+
; %add2 is known to not wrap via BTC.
249+
define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress {
250+
; CHECK-LABEL: define void @no_signed_wrap_iv_via_btc
251+
; CHECK-SAME: (ptr [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
252+
; CHECK-NEXT: entry:
253+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -100
254+
; CHECK-NEXT: [[SUB4:%.*]] = add i32 [[N]], -99
255+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
256+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB4]], i32 [[TMP0]])
257+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 100
258+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[N]]
259+
; CHECK-NEXT: br label [[OUTER:%.*]]
260+
; CHECK: outer.loopexit:
261+
; CHECK-NEXT: br label [[OUTER]]
262+
; CHECK: outer:
263+
; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
264+
; CHECK-NEXT: br i1 [[C]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
265+
; CHECK: loop.preheader:
266+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
267+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
268+
; CHECK: vector.ph:
269+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
270+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
271+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
272+
; CHECK: vector.body:
273+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
274+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
275+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUB4]], [[TMP3]]
276+
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
277+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]]
278+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
279+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4
280+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
281+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
282+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
283+
; CHECK: middle.block:
284+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
285+
; CHECK-NEXT: br i1 [[CMP_N]], label [[OUTER_LOOPEXIT:%.*]], label [[SCALAR_PH]]
286+
; CHECK: scalar.ph:
287+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
288+
; CHECK-NEXT: br label [[LOOP:%.*]]
289+
; CHECK: loop:
290+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
291+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[SUB4]], [[IV]]
292+
; CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[ADD2]] to i64
293+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[ADD_EXT]]
294+
; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4
295+
; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
296+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[INC]]
297+
; CHECK-NEXT: [[EC:%.*]] = icmp sgt i32 [[ADD]], [[N]]
298+
; CHECK-NEXT: br i1 [[EC]], label [[OUTER_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
299+
; CHECK: exit:
300+
; CHECK-NEXT: ret void
301+
;
302+
entry:
303+
%sub = add i32 %N, -100
304+
%sub4 = add i32 %N, -99
305+
br label %outer
306+
307+
outer:
308+
%c = call i1 @cond()
309+
br i1 %c, label %loop, label %exit
310+
311+
loop:
312+
%iv = phi i32 [ 0, %outer ], [ %inc, %loop ]
313+
%add2 = add i32 %sub4, %iv
314+
%add.ext = sext i32 %add2 to i64
315+
%gep.dst = getelementptr i32, ptr %dst, i64 %add.ext
316+
store i32 0, ptr %gep.dst, align 4
317+
%inc = add i32 %iv, 1
318+
%add = add i32 %sub, %inc
319+
%ec = icmp sgt i32 %add, %N
320+
br i1 %ec, label %outer, label %loop
321+
322+
exit:
323+
ret void
324+
}

0 commit comments

Comments
 (0)