Skip to content

Commit bda8fbe

Browse files
author
Sjoerd Meijer
committed
[LV] Fallback strategies if tail-folding fails
This implements 2 different vectorisation fallback strategies if tail-folding fails: 1) don't vectorise at all, or 2) vectorise using a scalar epilogue. This can be controlled with option -prefer-predicate-over-epilogue, that has been changed to take a numeric value corresponding to the tail-folding preference and preferred fallback. Patch by: Pierre van Houtryve, Sjoerd Meijer. Differential Revision: https://reviews.llvm.org/D79783
1 parent 60db26a commit bda8fbe

15 files changed

+323
-37
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ class LoopVectorizationLegality {
230230

231231
/// Return true if we can vectorize this loop while folding its tail by
232232
/// masking, and mark all respective loads/stores for masking.
233+
/// This object's state is only modified iff this function returns true.
233234
bool prepareToFoldTailByMasking();
234235

235236
/// Returns the primary induction variable.
@@ -370,8 +371,14 @@ class LoopVectorizationLegality {
370371
/// its original trip-count, under a proper guard, which should be preserved.
371372
/// \p SafePtrs is a list of addresses that are known to be legal and we know
372373
/// that we can read from them without segfault.
374+
/// \p MaskedOp is a list of instructions that have to be transformed into
375+
/// calls to the appropriate masked intrinsic when the loop is vectorized.
376+
/// \p ConditionalAssumes is a list of assume instructions in predicated
377+
/// blocks that must be dropped if the CFG gets flattened.
373378
bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
374-
bool PreserveGuards = false);
379+
SmallPtrSetImpl<const Instruction *> &MaskedOp,
380+
SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
381+
bool PreserveGuards = false) const;
375382

376383
/// Updates the vectorization state by adding \p Phi to the inductions list.
377384
/// This can set \p Phi as the main induction of the loop if \p Phi is a

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,10 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
919919
}
920920

921921
bool LoopVectorizationLegality::blockCanBePredicated(
922-
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
922+
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
923+
SmallPtrSetImpl<const Instruction *> &MaskedOp,
924+
SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
925+
bool PreserveGuards) const {
923926
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
924927

925928
for (Instruction &I : *BB) {
@@ -1026,7 +1029,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
10261029

10271030
// We must be able to predicate all blocks that need to be predicated.
10281031
if (blockNeedsPredication(BB)) {
1029-
if (!blockCanBePredicated(BB, SafePointers)) {
1032+
if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1033+
ConditionalAssumes)) {
10301034
reportVectorizationFailure(
10311035
"Control flow cannot be substituted for a select",
10321036
"control flow cannot be substituted for a select",
@@ -1253,31 +1257,37 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
12531257
Instruction *UI = cast<Instruction>(U);
12541258
if (TheLoop->contains(UI))
12551259
continue;
1256-
reportVectorizationFailure(
1257-
"Cannot fold tail by masking, loop has an outside user for",
1258-
"Cannot fold tail by masking in the presence of live outs.",
1259-
"LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1260+
LLVM_DEBUG(
1261+
dbgs()
1262+
<< "LV: Cannot fold tail by masking, loop has an outside user for "
1263+
<< *UI << "\n");
12601264
return false;
12611265
}
12621266
}
12631267

12641268
// The list of pointers that we can safely read and write to remains empty.
12651269
SmallPtrSet<Value *, 8> SafePointers;
12661270

1271+
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1272+
SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1273+
12671274
// Check and mark all blocks for predication, including those that ordinarily
12681275
// do not need predication such as the header block.
12691276
for (BasicBlock *BB : TheLoop->blocks()) {
1270-
if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1271-
reportVectorizationFailure(
1272-
"Cannot fold tail by masking as required",
1273-
"control flow cannot be substituted for a select",
1274-
"NoCFGForSelect", ORE, TheLoop,
1275-
BB->getTerminator());
1277+
if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1278+
TmpConditionalAssumes,
1279+
/* MaskAllLoads= */ true)) {
1280+
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
12761281
return false;
12771282
}
12781283
}
12791284

12801285
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1286+
1287+
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1288+
ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1289+
TmpConditionalAssumes.end());
1290+
12811291
return true;
12821292
}
12831293

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,36 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
178178
"value are vectorized only if no scalar iteration overheads "
179179
"are incurred."));
180180

181-
// Indicates that an epilogue is undesired, predication is preferred.
182-
// This means that the vectorizer will try to fold the loop-tail (epilogue)
183-
// into the loop and predicate the loop body accordingly.
184-
static cl::opt<bool> PreferPredicateOverEpilog(
185-
"prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
186-
cl::desc("Indicate that an epilogue is undesired, predication should be "
187-
"used instead."));
181+
// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
182+
// that predication is preferred, and this lists all options. I.e., the
183+
// vectorizer will try to fold the tail-loop (epilogue) into the vector body
184+
// and predicate the instructions accordingly. If tail-folding fails, there are
185+
// different fallback strategies depending on these values:
186+
namespace PreferPredicateTy {
187+
enum Option {
188+
ScalarEpilogue = 0,
189+
PredicateElseScalarEpilogue,
190+
PredicateOrDontVectorize
191+
};
192+
}
193+
194+
static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
195+
"prefer-predicate-over-epilogue",
196+
cl::init(PreferPredicateTy::ScalarEpilogue),
197+
cl::Hidden,
198+
cl::desc("Tail-folding and predication preferences over creating a scalar "
199+
"epilogue loop."),
200+
cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue,
201+
"scalar-epilogue",
202+
"Don't tail-predicate loops, create scalar epilogue"),
203+
clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
204+
"predicate-else-scalar-epilogue",
205+
"prefer tail-folding, create scalar epilogue if tail "
206+
"folding fails."),
207+
clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
208+
"predicate-dont-vectorize",
209+
"prefers tail-folding, don't attempt vectorization if "
210+
"tail-folding fails.")));
188211

189212
static cl::opt<bool> MaximizeBandwidth(
190213
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -196,7 +219,7 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
196219
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
197220

198221
/// An interleave-group may need masking if it resides in a block that needs
199-
/// predication, or in order to mask away gaps.
222+
/// predication, or in order to mask away gaps.
200223
static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
201224
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
202225
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
@@ -5241,6 +5264,19 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(unsigned UserVF,
52415264
return MaxVF;
52425265
}
52435266

5267+
// If there was a tail-folding hint/switch, but we can't fold the tail by
5268+
// masking, fallback to a vectorization with a scalar epilogue.
5269+
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
5270+
if (PreferPredicateOverEpilogue == PreferPredicateTy::PredicateOrDontVectorize) {
5271+
LLVM_DEBUG(dbgs() << "LV: Can't fold tail by masking: don't vectorize\n");
5272+
return None;
5273+
}
5274+
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
5275+
"scalar epilogue instead.\n");
5276+
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
5277+
return MaxVF;
5278+
}
5279+
52445280
if (TC == 0) {
52455281
reportVectorizationFailure(
52465282
"Unable to calculate the loop count due to complex control flow",
@@ -8055,8 +8091,8 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
80558091
Hints.getForce() != LoopVectorizeHints::FK_Enabled))
80568092
return CM_ScalarEpilogueNotAllowedOptSize;
80578093

8058-
bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
8059-
!PreferPredicateOverEpilog;
8094+
bool PredicateOptDisabled = PreferPredicateOverEpilogue.getNumOccurrences() &&
8095+
!PreferPredicateOverEpilogue;
80608096

80618097
// 2) Next, if disabling predication is requested on the command line, honour
80628098
// this and request a scalar epilogue.
@@ -8065,8 +8101,8 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
80658101

80668102
// 3) and 4) look if enabling predication is requested on the command line,
80678103
// with a loop hint, or if the TTI hook indicates this is profitable, request
8068-
// predication .
8069-
if (PreferPredicateOverEpilog ||
8104+
// predication.
8105+
if (PreferPredicateOverEpilogue ||
80708106
Hints.getPredicate() == LoopVectorizeHints::FK_Enabled ||
80718107
(TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
80728108
LVL.getLAI()) &&

llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
; RUN: FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
3232

3333
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
34-
; RUN: -prefer-predicate-over-epilog=false \
34+
; RUN: -prefer-predicate-over-epilogue=scalar-epilogue \
3535
; RUN: -tail-predication=enabled -loop-vectorize \
3636
; RUN: -enable-arm-maskedldst=true -S < %s | \
3737
; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING
3838

3939
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
40-
; RUN: -prefer-predicate-over-epilog=true \
40+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
4141
; RUN: -tail-predication=enabled -loop-vectorize \
4242
; RUN: -enable-arm-maskedldst=true -S < %s | \
4343
; RUN: FileCheck %s -check-prefixes=CHECK,FOLDING-OPT

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: opt < %s -loop-vectorize -S | FileCheck %s --check-prefixes=COMMON,DEFAULT
2-
; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
2+
; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
3+
; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
34
; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP
45

56
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \
22
; RUN: FileCheck %s -check-prefix=CHECK
33

4-
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S < %s | \
5-
; RUN: FileCheck -check-prefix=PREDFLAG %s
4+
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled \
5+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | \
6+
; RUN: FileCheck -check-prefix=PREDFLAG %s
67

78
; This test has a loop hint "predicate.predicate" set to false, so shouldn't
89
; get tail-folded, except with -prefer-predicate-over-epilog which then

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reduces-vf.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: opt < %s -mattr=+mve,+mve.fp -loop-vectorize -S | FileCheck %s --check-prefixes=DEFAULT
2-
; RUN: opt < %s -mattr=+mve,+mve.fp -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=TAILPRED
2+
; RUN: opt < %s -mattr=+mve,+mve.fp -loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s --check-prefixes=TAILPRED
33

44
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
55
target triple = "thumbv8.1m.main-arm-none-eabi"
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -loop-vectorize -mattr=+armv8.1-m.main,+mve.fp -tail-predication=disabled< %s | FileCheck %s
3+
; RUN: opt -S -loop-vectorize -mattr=+armv8.1-m.main,+mve.fp -tail-predication=enabled < %s | FileCheck %s
4+
5+
; This test should produce the same result (vectorized loop + scalar epilogue) with
6+
; default options and when MVE Tail Predication is enabled, as this loop's tail cannot be folded
7+
; by masking due to an outside user of %incdec.ptr in %end.
8+
9+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
10+
target triple = "thumbv8.1m.main-arm-unknown-eabihf"
11+
12+
define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
13+
; CHECK-LABEL: @outside_user_blocks_tail_folding(
14+
; CHECK-NEXT: header:
15+
; CHECK-NEXT: [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
16+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 16
17+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
18+
; CHECK: vector.ph:
19+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 16
20+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
21+
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
22+
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
23+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
24+
; CHECK: vector.body:
25+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]]
27+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
28+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0
29+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]]
30+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
31+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0
32+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
33+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 1
34+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
35+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <16 x i8>*
36+
; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1
37+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
38+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
39+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
40+
; CHECK: middle.block:
41+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
42+
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
43+
; CHECK: scalar.ph:
44+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
45+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
46+
; CHECK-NEXT: br label [[BODY:%.*]]
47+
; CHECK: body:
48+
; CHECK-NEXT: [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
49+
; CHECK-NEXT: [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
50+
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
51+
; CHECK-NEXT: [[DEC]] = add nsw i32 [[DEC66]], -1
52+
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
53+
; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1
54+
; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
55+
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2
56+
; CHECK: end:
57+
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ]
58+
; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
59+
; CHECK-NEXT: ret void
60+
;
61+
header:
62+
%ptr0 = load i8*, i8** %pos, align 4
63+
br label %body
64+
65+
body:
66+
%dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
67+
%buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
68+
%incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
69+
%dec = add nsw i32 %dec66, -1
70+
%0 = load i8, i8* %incdec.ptr, align 1
71+
store i8 %0, i8* %buff, align 1
72+
%tobool11 = icmp eq i32 %dec, 0
73+
br i1 %tobool11, label %end, label %body
74+
75+
end:
76+
store i8* %incdec.ptr, i8** %pos, align 4
77+
ret void
78+
}

llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -loop-vectorize -hexagon-autohvx=1 -force-vector-width=64 -prefer-predicate-over-epilog -S %s | FileCheck %s
1+
; RUN: opt -loop-vectorize -hexagon-autohvx=1 -force-vector-width=64 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
22

33
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
44
target triple = "hexagon"

llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
3-
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck %s
3+
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
44

55
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66
target triple = "x86_64-unknown-linux-gnu"

llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -loop-vectorize -vectorize-num-stores-pred=2 -prefer-predicate-over-epilog -S | FileCheck %s
2+
; RUN: opt < %s -loop-vectorize -vectorize-num-stores-pred=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
33

44
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
55

llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -loop-vectorize -force-vector-width=2 -S -prefer-predicate-over-epilog %s | FileCheck %s
2+
; RUN: opt -loop-vectorize -force-vector-width=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize %s | FileCheck %s
33

44

55
; Test case for PR46525. There are two candidates to pick for

llvm/test/Transforms/LoopVectorize/reduction-predselect.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilog -prefer-predicated-reduction-select -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s
2+
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -prefer-predicated-reduction-select -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s
33

44
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
55

llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -force-vector-width=4 -S | FileCheck %s
1+
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=4 -S | FileCheck %s
22

33
; Check that a counting-down loop which has no primary induction variable
44
; is vectorized with preferred predication.

0 commit comments

Comments
 (0)