@@ -1510,19 +1510,36 @@ class LoopVectorizationCostModel {
1510
1510
}
1511
1511
1512
1512
// / Returns the TailFoldingStyle that is best for the current loop.
1513
- TailFoldingStyle
1514
- getTailFoldingStyle (bool IVUpdateMayOverflow = true ) const {
1515
- if (!CanFoldTailByMasking)
1516
- return TailFoldingStyle::None;
1513
+ TailFoldingStyle getTailFoldingStyle (bool IVUpdateMayOverflow = true ) const {
1514
+ return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first
1515
+ : ChosenTailFoldingStyle.second ;
1516
+ }
1517
+
1518
+ // / Selects and saves TailFoldingStyle for 2 options - if IV update may
1519
+ // / overflow or not.
1520
+ void setTailFoldinStyles () {
1521
+ assert (ChosenTailFoldingStyle.first == TailFoldingStyle::None &&
1522
+ ChosenTailFoldingStyle.second == TailFoldingStyle::None &&
1523
+ " Tail folding must not be selected yet." );
1524
+ if (!Legal->prepareToFoldTailByMasking ())
1525
+ return ;
1517
1526
1518
- if (ForceTailFoldingStyle.getNumOccurrences ())
1519
- return ForceTailFoldingStyle;
1527
+ if (ForceTailFoldingStyle.getNumOccurrences ()) {
1528
+ ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second =
1529
+ ForceTailFoldingStyle;
1530
+ return ;
1531
+ }
1520
1532
1521
- return TTI.getPreferredTailFoldingStyle (IVUpdateMayOverflow);
1533
+ ChosenTailFoldingStyle.first =
1534
+ TTI.getPreferredTailFoldingStyle (/* IVUpdateMayOverflow=*/ true );
1535
+ ChosenTailFoldingStyle.second =
1536
+ TTI.getPreferredTailFoldingStyle (/* IVUpdateMayOverflow=*/ false );
1522
1537
}
1523
1538
1524
1539
// / Returns true if all loop blocks should be masked to fold tail loop.
1525
1540
bool foldTailByMasking () const {
1541
+ // TODO: check if it is possible to check for None style independent of
1542
+ // IVUpdateMayOverflow flag in getTailFoldingStyle.
1526
1543
return getTailFoldingStyle () != TailFoldingStyle::None;
1527
1544
}
1528
1545
@@ -1675,8 +1692,10 @@ class LoopVectorizationCostModel {
1675
1692
// / iterations to execute in the scalar loop.
1676
1693
ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
1677
1694
1678
- // / All blocks of loop are to be masked to fold tail of scalar iterations.
1679
- bool CanFoldTailByMasking = false ;
1695
+ // / Control finally chosen tail folding style. The first element is used if
1696
+ // / the IV update may overflow, the second element - if it does not.
1697
+ std::pair<TailFoldingStyle, TailFoldingStyle> ChosenTailFoldingStyle =
1698
+ std::make_pair (TailFoldingStyle::None, TailFoldingStyle::None);
1680
1699
1681
1700
// / A map holding scalar costs for different vectorization factors. The
1682
1701
// / presence of a cost for an instruction in the mapping indicates that the
@@ -4633,10 +4652,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
4633
4652
// found modulo the vectorization factor is not zero, try to fold the tail
4634
4653
// by masking.
4635
4654
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
4636
- if (Legal-> prepareToFoldTailByMasking ()) {
4637
- CanFoldTailByMasking = true ;
4655
+ setTailFoldinStyles ();
4656
+ if ( foldTailByMasking ())
4638
4657
return MaxFactors;
4639
- }
4640
4658
4641
4659
// If there was a tail-folding hint/switch, but we can't fold the tail by
4642
4660
// masking, fallback to a vectorization with a scalar epilogue.
0 commit comments