Skip to content

Commit 0fcb3af

Browse files
author
Sjoerd Meijer
committed
[LV] Forced vectorization with runtime checks and OptForSize
When vectorisation is forced with a pragma, we optimise for min size, and we need to emit runtime memory checks, then allow this code growth and don't run in an assert like we currently do. This is the result of D65197 and D66803, and was a use-case not really considered before. If this now happens, we emit an optimisation remark warning about the code-size expansion, which can be avoided by not forcing vectorisation or possibly source-code modifications. Differential Revision: https://reviews.llvm.org/D67764 llvm-svn: 372694
1 parent 54b78f3 commit 0fcb3af

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2730,8 +2730,19 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
27302730
if (!MemRuntimeCheck)
27312731
return;
27322732

2733-
assert(!BB->getParent()->hasOptSize() &&
2734-
"Cannot emit memory checks when optimizing for size");
2733+
if (BB->getParent()->hasOptSize()) {
2734+
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
2735+
"Cannot emit memory checks when optimizing for size, unless forced "
2736+
"to vectorize.");
2737+
ORE->emit([&]() {
2738+
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize",
2739+
L->getStartLoc(), L->getHeader())
2740+
<< "Code-size may be reduced by not forcing "
2741+
"vectorization, or by source-code modifications "
2742+
"eliminating the need for runtime checks "
2743+
"(e.g., adding 'restrict').";
2744+
});
2745+
}
27352746

27362747
// Create a new block containing the memory check.
27372748
BB->setName("vector.memcheck");

llvm/test/Transforms/LoopVectorize/runtime-check.ll

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
2+
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
3+
; RUN: opt < %s -loop-vectorize -disable-basicaa -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -check-prefix=FORCED_OPTSIZE
34

45
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
56

@@ -159,6 +160,32 @@ loopexit:
159160

160161
; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}})
161162

163+
define dso_local void @forced_optsize(i64* noalias nocapture readonly %x_p, i64* noalias nocapture readonly %y_p, i64* noalias nocapture %z_p) minsize optsize {
164+
;
165+
; FORCED_OPTSIZE: remark: <unknown>:0:0: Code-size may be reduced by not forcing vectorization, or by source-code modifications eliminating the need for runtime checks (e.g., adding 'restrict').
166+
; FORCED_OPTSIZE-LABEL: @forced_optsize(
167+
; FORCED_OPTSIZE: vector.body:
168+
;
169+
entry:
170+
br label %for.body
171+
172+
for.cond.cleanup:
173+
ret void
174+
175+
for.body:
176+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
177+
%arrayidx = getelementptr inbounds i64, i64* %x_p, i64 %indvars.iv
178+
%0 = load i64, i64* %arrayidx, align 8
179+
%arrayidx2 = getelementptr inbounds i64, i64* %y_p, i64 %indvars.iv
180+
%1 = load i64, i64* %arrayidx2, align 8
181+
%add = add nsw i64 %1, %0
182+
%arrayidx4 = getelementptr inbounds i64, i64* %z_p, i64 %indvars.iv
183+
store i64 %add, i64* %arrayidx4, align 8
184+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
185+
%exitcond = icmp eq i64 %indvars.iv.next, 128
186+
br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !12
187+
}
188+
162189
!llvm.module.flags = !{!0, !1}
163190
!llvm.dbg.cu = !{!9}
164191
!0 = !{i32 2, !"Dwarf Version", i32 4}
@@ -177,3 +204,6 @@ loopexit:
177204
splitDebugFilename: "abc.debug", emissionKind: 2)
178205
!10 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
179206
!11 = !{i32 2, !"Debug Info Version", i32 3}
207+
!12 = distinct !{!12, !13, !14}
208+
!13 = !{!"llvm.loop.vectorize.width", i32 2}
209+
!14 = !{!"llvm.loop.vectorize.enable", i1 true}

0 commit comments

Comments
 (0)