Skip to content

Commit 2c29c0d

Browse files
committed
!fixup address latest comments, thanks!
1 parent 3c83402 commit 2c29c0d

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4054,17 +4054,24 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
40544054
}
40554055

40564056
// Try to find an unroll count that maximizes the use of the instruction
4057-
// window.
4058-
unsigned UC = std::max(16ll / Size, 2ll);
4059-
unsigned BestUC = 0;
4060-
while (UC <= 8 && UC * Size <= 48) {
4061-
if ((UC * Size % 16) == 0 || (BestUC * Size % 16) < (UC * Size % 16) % 16) {
4057+
// window, i.e. trying to fetch as many instructions per cycle as possible.
4058+
unsigned MaxInstsPerLine = 16;
4059+
unsigned UC = 1;
4060+
unsigned BestUC = 1;
4061+
unsigned SizeWithBestUC = BestUC * Size;
4062+
while (UC <= 8) {
4063+
unsigned SizeWithUC = UC * Size;
4064+
if (SizeWithUC > 48)
4065+
break;
4066+
if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4067+
(SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
40624068
BestUC = UC;
4069+
SizeWithBestUC = BestUC * Size;
40634070
}
40644071
UC++;
40654072
}
40664073

4067-
if (BestUC == 0 || none_of(Stores, [&LoadedValues](StoreInst *SI) {
4074+
if (BestUC == 1 || none_of(Stores, [&LoadedValues](StoreInst *SI) {
40684075
return LoadedValues.contains(SI->getOperand(0));
40694076
}))
40704077
return;
@@ -4090,15 +4097,21 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
40904097
// Disable partial & runtime unrolling on -Os.
40914098
UP.PartialOptSizeThreshold = 0;
40924099

4093-
if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
4094-
EnableFalkorHWPFUnrollFix)
4095-
getFalkorUnrollingPreferences(L, SE, UP);
4096-
4097-
if (ST->getProcFamily() == AArch64Subtarget::AppleA14 ||
4098-
ST->getProcFamily() == AArch64Subtarget::AppleA15 ||
4099-
ST->getProcFamily() == AArch64Subtarget::AppleA16 ||
4100-
ST->getProcFamily() == AArch64Subtarget::AppleM4)
4100+
// Apply subtarget-specific unrolling preferences.
4101+
switch (ST->getProcFamily()) {
4102+
case AArch64Subtarget::AppleA14:
4103+
case AArch64Subtarget::AppleA15:
4104+
case AArch64Subtarget::AppleA16:
4105+
case AArch64Subtarget::AppleM4:
41014106
getAppleRuntimeUnrollPreferences(L, SE, UP, *this);
4107+
break;
4108+
case AArch64Subtarget::Falkor:
4109+
if (EnableFalkorHWPFUnrollFix)
4110+
getFalkorUnrollingPreferences(L, SE, UP);
4111+
break;
4112+
default:
4113+
break;
4114+
}
41024115

41034116
// Scan the loop: don't unroll loops with calls as this could prevent
41044117
// inlining. Don't unroll vector loops either, as they don't benefit much from

0 commit comments

Comments
 (0)