@@ -4054,17 +4054,24 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
4054
4054
}
4055
4055
4056
4056
// Try to find an unroll count that maximizes the use of the instruction
4057
- // window.
4058
- unsigned UC = std::max (16ll / Size, 2ll );
4059
- unsigned BestUC = 0 ;
4060
- while (UC <= 8 && UC * Size <= 48 ) {
4061
- if ((UC * Size % 16 ) == 0 || (BestUC * Size % 16 ) < (UC * Size % 16 ) % 16 ) {
4057
+ // window, i.e. trying to fetch as many instructions per cycle as possible.
4058
+ unsigned MaxInstsPerLine = 16 ;
4059
+ unsigned UC = 1 ;
4060
+ unsigned BestUC = 1 ;
4061
+ unsigned SizeWithBestUC = BestUC * Size;
4062
+ while (UC <= 8 ) {
4063
+ unsigned SizeWithUC = UC * Size;
4064
+ if (SizeWithUC > 48 )
4065
+ break ;
4066
+ if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4067
+ (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4062
4068
BestUC = UC;
4069
+ SizeWithBestUC = BestUC * Size;
4063
4070
}
4064
4071
UC++;
4065
4072
}
4066
4073
4067
- if (BestUC == 0 || none_of (Stores, [&LoadedValues](StoreInst *SI) {
4074
+ if (BestUC == 1 || none_of (Stores, [&LoadedValues](StoreInst *SI) {
4068
4075
return LoadedValues.contains (SI->getOperand (0 ));
4069
4076
}))
4070
4077
return ;
@@ -4090,15 +4097,21 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
4090
4097
// Disable partial & runtime unrolling on -Os.
4091
4098
UP.PartialOptSizeThreshold = 0 ;
4092
4099
4093
- if (ST->getProcFamily () == AArch64Subtarget::Falkor &&
4094
- EnableFalkorHWPFUnrollFix)
4095
- getFalkorUnrollingPreferences (L, SE, UP);
4096
-
4097
- if (ST->getProcFamily () == AArch64Subtarget::AppleA14 ||
4098
- ST->getProcFamily () == AArch64Subtarget::AppleA15 ||
4099
- ST->getProcFamily () == AArch64Subtarget::AppleA16 ||
4100
- ST->getProcFamily () == AArch64Subtarget::AppleM4)
4100
+ // Apply subtarget-specific unrolling preferences.
4101
+ switch (ST->getProcFamily ()) {
4102
+ case AArch64Subtarget::AppleA14:
4103
+ case AArch64Subtarget::AppleA15:
4104
+ case AArch64Subtarget::AppleA16:
4105
+ case AArch64Subtarget::AppleM4:
4101
4106
getAppleRuntimeUnrollPreferences (L, SE, UP, *this );
4107
+ break ;
4108
+ case AArch64Subtarget::Falkor:
4109
+ if (EnableFalkorHWPFUnrollFix)
4110
+ getFalkorUnrollingPreferences (L, SE, UP);
4111
+ break ;
4112
+ default :
4113
+ break ;
4114
+ }
4102
4115
4103
4116
// Scan the loop: don't unroll loops with calls as this could prevent
4104
4117
// inlining. Don't unroll vector loops either, as they don't benefit much from
0 commit comments