Skip to content

Commit fb755c0

Browse files
committed
Address review comments
1 parent b1b95ed commit fb755c0

File tree

3 files changed

+32
-37
lines changed

3 files changed

+32
-37
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
6969
cl::init(true), cl::Hidden);
7070

7171
static cl::opt<unsigned> SmallMultiExitLoopUF(
72-
"small-multi-exit-loop-unroll-factor", cl::init(0), cl::Hidden,
72+
"aarch64-small-multi-exit-loop-unroll", cl::init(0), cl::Hidden,
7373
cl::desc(
7474
"Force unrolling of small multi-exit loops with given unroll factor"));
7575

@@ -4386,35 +4386,45 @@ static bool shouldUnrollLoopWithInstruction(Instruction &I,
43864386
if (I.getType()->isVectorTy())
43874387
return false;
43884388

4389-
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
4390-
if (const Function *F = cast<CallBase>(I).getCalledFunction())
4391-
if (!TTI.isLoweredToCall(F))
4392-
return true;
4389+
if (isa<CallBase>(I)) {
4390+
if (isa<CallInst>(I) || isa<InvokeInst>(I))
4391+
if (const Function *F = cast<CallBase>(I).getCalledFunction())
4392+
if (!TTI.isLoweredToCall(F))
4393+
return true;
43934394
return false;
43944395
}
43954396

43964397
return true;
43974398
}
43984399

4399-
static InstructionCost getSizeOfLoop(Loop *L, AArch64TTIImpl &TTI) {
4400+
static unsigned getLoopSize(Loop *L, AArch64TTIImpl &TTI,
4401+
InstructionCost Budget) {
44004402
// Estimate the size of the loop.
4401-
InstructionCost Size = 0;
4403+
InstructionCost LoopCost = 0;
4404+
4405+
if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
4406+
return 0;
4407+
44024408
for (auto *BB : L->getBlocks()) {
44034409
for (auto &I : *BB) {
44044410
if (!shouldUnrollLoopWithInstruction(I, TTI))
4405-
return InstructionCost::getInvalid();
4411+
return 0;
44064412

44074413
SmallVector<const Value *, 4> Operands(I.operand_values());
44084414
InstructionCost Cost =
44094415
TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
44104416
// This can happen with intrinsics that don't currently have a cost model
44114417
// or for some operations that require SVE.
44124418
if (!Cost.isValid())
4413-
return InstructionCost::getInvalid();
4414-
Size += *Cost.getValue();
4419+
return 0;
4420+
4421+
LoopCost += Cost;
4422+
if (LoopCost > Budget)
4423+
return 0;
44154424
}
44164425
}
4417-
return Size;
4426+
4427+
return *LoopCost.getValue();
44184428
}
44194429

44204430
static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
@@ -4432,12 +4442,8 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
44324442
if (MaxTC > 0 && MaxTC <= 32)
44334443
return false;
44344444

4435-
if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
4436-
return false;
4437-
44384445
// Estimate the size of the loop.
4439-
InstructionCost Size = getSizeOfLoop(L, TTI);
4440-
if (!Size.isValid())
4446+
if (!getLoopSize(L, TTI, 5))
44414447
return false;
44424448

44434449
// Small search loops with multiple exits can be highly beneficial to unroll.
@@ -4452,7 +4458,7 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
44524458
}))
44534459
return false;
44544460

4455-
return *Size.getValue() < 6;
4461+
return true;
44564462
}
44574463

44584464
/// For Apple CPUs, we want to runtime-unroll loops to make better use if the
@@ -4469,28 +4475,15 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
44694475
if (!L->isInnermost() || L->getNumBlocks() > 8)
44704476
return;
44714477

4478+
// This is handled by common code.
4479+
if (!L->getExitBlock())
4480+
return;
4481+
44724482
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
44734483
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
44744484
(SE.getSmallConstantMaxTripCount(L) > 0 &&
44754485
SE.getSmallConstantMaxTripCount(L) <= 32))
44764486
return;
4477-
if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
4478-
return;
4479-
4480-
int64_t Size = 0;
4481-
for (auto *BB : L->getBlocks()) {
4482-
for (auto &I : *BB) {
4483-
if (!isa<IntrinsicInst>(&I) && isa<CallBase>(&I))
4484-
return;
4485-
SmallVector<const Value *, 4> Operands(I.operand_values());
4486-
Size +=
4487-
*TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize).getValue();
4488-
}
4489-
}
4490-
4491-
// This is handled by common code.
4492-
if (!L->getExitBlock())
4493-
return;
44944487

44954488
if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
44964489
return;
@@ -4502,7 +4495,9 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
45024495
// dependencies, to expose more parallel memory access streams.
45034496
BasicBlock *Header = L->getHeader();
45044497
if (Header == L->getLoopLatch()) {
4505-
if (Size > 8)
4498+
// Estimate the size of the loop.
4499+
unsigned Size = getLoopSize(L, TTI, 8);
4500+
if (!Size)
45064501
return;
45074502

45084503
SmallPtrSet<Value *, 8> LoadedValues;

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
44
; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
55
; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
6-
; RUN: opt -p loop-unroll -mcpu=apple-m4 -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefix=UNROLL2 %s
6+
; RUN: opt -p loop-unroll -mcpu=apple-m4 -aarch64-small-multi-exit-loop-unroll=2 -S %s | FileCheck --check-prefix=UNROLL2 %s
77
; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
88

99
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"

llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -p loop-unroll -mcpu=generic -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefixes=COMMON,UNROLL2 %s
2+
; RUN: opt -p loop-unroll -mcpu=generic -aarch64-small-multi-exit-loop-unroll=2 -S %s | FileCheck --check-prefixes=COMMON,UNROLL2 %s
33
; RUN: opt -p loop-unroll -mcpu=generic -S %s | FileCheck --check-prefixes=COMMON,GENERIC %s
44

55
target triple = "aarch64-linux-gnu"

0 commit comments

Comments
 (0)