@@ -4586,21 +4586,17 @@ static bool shouldUnrollLoopWithInstruction(Instruction &I,
4586
4586
}
4587
4587
4588
4588
// This function returns true if the loop:
4589
- // 1. Contains only those instructions that should be unrolled,
4590
- // 2. Has a valid cost,
4591
- // 3. Has a cost within the supplied budget.
4589
+ // 1. Has a valid cost, and
4590
+ // 2. Has a cost within the supplied budget.
4592
4591
// Otherwise it returns false.
4593
- static bool canUnrollLoopWithinBudget (Loop *L, AArch64TTIImpl &TTI,
4594
- InstructionCost Budget,
4595
- unsigned *FinalSize) {
4592
+ static bool isLoopSizeWithinBudget (Loop *L, AArch64TTIImpl &TTI,
4593
+ InstructionCost Budget,
4594
+ unsigned *FinalSize) {
4596
4595
// Estimate the size of the loop.
4597
4596
InstructionCost LoopCost = 0 ;
4598
4597
4599
4598
for (auto *BB : L->getBlocks ()) {
4600
4599
for (auto &I : *BB) {
4601
- if (!shouldUnrollLoopWithInstruction (I, TTI))
4602
- return false ;
4603
-
4604
4600
SmallVector<const Value *, 4 > Operands (I.operand_values ());
4605
4601
InstructionCost Cost =
4606
4602
TTI.getInstructionCost (&I, Operands, TTI::TCK_CodeSize);
@@ -4635,11 +4631,8 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
4635
4631
if (MaxTC > 0 && MaxTC <= 32 )
4636
4632
return false ;
4637
4633
4638
- if (findStringMetadataForLoop (L, " llvm.loop.isvectorized" ))
4639
- return false ;
4640
-
4641
- // Estimate the size of the loop.
4642
- if (!canUnrollLoopWithinBudget (L, TTI, 5 , nullptr ))
4634
+ // Make sure the loop size is <= 5.
4635
+ if (!isLoopSizeWithinBudget (L, TTI, 5 , nullptr ))
4643
4636
return false ;
4644
4637
4645
4638
// Small search loops with multiple exits can be highly beneficial to unroll.
@@ -4671,7 +4664,7 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
4671
4664
if (!L->isInnermost () || L->getNumBlocks () > 8 )
4672
4665
return ;
4673
4666
4674
- // This is handled by common code.
4667
+ // Loops with multiple exits are handled by common code.
4675
4668
if (!L->getExitBlock ())
4676
4669
return ;
4677
4670
@@ -4696,7 +4689,7 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
4696
4689
if (Header == L->getLoopLatch ()) {
4697
4690
// Estimate the size of the loop.
4698
4691
unsigned Size;
4699
- if (!canUnrollLoopWithinBudget (L, TTI, 8 , &Size))
4692
+ if (!isLoopSizeWithinBudget (L, TTI, 8 , &Size))
4700
4693
return ;
4701
4694
4702
4695
SmallPtrSet<Value *, 8 > LoadedValues;
@@ -4793,6 +4786,16 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
4793
4786
// Disable partial & runtime unrolling on -Os.
4794
4787
UP.PartialOptSizeThreshold = 0 ;
4795
4788
4789
+ // Scan the loop: don't unroll loops with calls as this could prevent
4790
+ // inlining. Don't unroll vector loops either, as they don't benefit much from
4791
+ // unrolling.
4792
+ for (auto *BB : L->getBlocks ()) {
4793
+ for (auto &I : *BB) {
4794
+ if (!shouldUnrollLoopWithInstruction (I, *this ))
4795
+ return ;
4796
+ }
4797
+ }
4798
+
4796
4799
// Apply subtarget-specific unrolling preferences.
4797
4800
switch (ST->getProcFamily ()) {
4798
4801
case AArch64Subtarget::AppleA14:
@@ -4822,16 +4825,6 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
4822
4825
return ;
4823
4826
}
4824
4827
4825
- // Scan the loop: don't unroll loops with calls as this could prevent
4826
- // inlining. Don't unroll vector loops either, as they don't benefit much from
4827
- // unrolling.
4828
- for (auto *BB : L->getBlocks ()) {
4829
- for (auto &I : *BB) {
4830
- if (!shouldUnrollLoopWithInstruction (I, *this ))
4831
- return ;
4832
- }
4833
- }
4834
-
4835
4828
// Enable runtime unrolling for in-order models
4836
4829
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
4837
4830
// checking for that case, we can ensure that the default behaviour is
0 commit comments