Skip to content

Commit fdd60c7

Browse files
[LV][NFC]Preselect folding style while chosing the max VF, NFC.
Selects the tail-folding style while choosing the max vector factor and storing it in the data member rather than calculating it each time upon getTailFoldingStyle call. Part of #76172 Reviewers: ayalz, fhahn Reviewed By: fhahn Pull Request: #81885
1 parent 680c780 commit fdd60c7

File tree

1 file changed

+30
-12
lines changed

1 file changed

+30
-12
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,19 +1510,36 @@ class LoopVectorizationCostModel {
15101510
}
15111511

15121512
/// Returns the TailFoldingStyle that is best for the current loop.
1513-
TailFoldingStyle
1514-
getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
1515-
if (!CanFoldTailByMasking)
1516-
return TailFoldingStyle::None;
1513+
TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
1514+
return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first
1515+
: ChosenTailFoldingStyle.second;
1516+
}
1517+
1518+
/// Selects and saves TailFoldingStyle for 2 options - if IV update may
1519+
/// overflow or not.
1520+
void setTailFoldinStyles() {
1521+
assert(ChosenTailFoldingStyle.first == TailFoldingStyle::None &&
1522+
ChosenTailFoldingStyle.second == TailFoldingStyle::None &&
1523+
"Tail folding must not be selected yet.");
1524+
if (!Legal->prepareToFoldTailByMasking())
1525+
return;
15171526

1518-
if (ForceTailFoldingStyle.getNumOccurrences())
1519-
return ForceTailFoldingStyle;
1527+
if (ForceTailFoldingStyle.getNumOccurrences()) {
1528+
ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second =
1529+
ForceTailFoldingStyle;
1530+
return;
1531+
}
15201532

1521-
return TTI.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
1533+
ChosenTailFoldingStyle.first =
1534+
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true);
1535+
ChosenTailFoldingStyle.second =
1536+
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false);
15221537
}
15231538

15241539
/// Returns true if all loop blocks should be masked to fold tail loop.
15251540
bool foldTailByMasking() const {
1541+
// TODO: check if it is possible to check for None style independent of
1542+
// IVUpdateMayOverflow flag in getTailFoldingStyle.
15261543
return getTailFoldingStyle() != TailFoldingStyle::None;
15271544
}
15281545

@@ -1675,8 +1692,10 @@ class LoopVectorizationCostModel {
16751692
/// iterations to execute in the scalar loop.
16761693
ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
16771694

1678-
/// All blocks of loop are to be masked to fold tail of scalar iterations.
1679-
bool CanFoldTailByMasking = false;
1695+
/// Control finally chosen tail folding style. The first element is used if
1696+
/// the IV update may overflow, the second element - if it does not.
1697+
std::pair<TailFoldingStyle, TailFoldingStyle> ChosenTailFoldingStyle =
1698+
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
16801699

16811700
/// A map holding scalar costs for different vectorization factors. The
16821701
/// presence of a cost for an instruction in the mapping indicates that the
@@ -4633,10 +4652,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
46334652
// found modulo the vectorization factor is not zero, try to fold the tail
46344653
// by masking.
46354654
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
4636-
if (Legal->prepareToFoldTailByMasking()) {
4637-
CanFoldTailByMasking = true;
4655+
setTailFoldinStyles();
4656+
if (foldTailByMasking())
46384657
return MaxFactors;
4639-
}
46404658

46414659
// If there was a tail-folding hint/switch, but we can't fold the tail by
46424660
// masking, fallback to a vectorization with a scalar epilogue.

0 commit comments

Comments
 (0)