Skip to content

Commit c5558a3

Browse files
committed
Revert "[LoopVectorize] Refine runtime memory check costs when there is an outer loop"
This reverts commit a152314.
1 parent ea7c58b commit c5558a3

File tree

2 files changed

+10
-39
lines changed

2 files changed

+10
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,7 +2055,7 @@ class GeneratedRTChecks {
20552055
}
20562056
}
20572057

2058-
InstructionCost getCost(Loop *OuterLoop) {
2058+
InstructionCost getCost() {
20592059
if (SCEVCheckBlock || MemCheckBlock)
20602060
LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n");
20612061

@@ -2076,45 +2076,16 @@ class GeneratedRTChecks {
20762076
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
20772077
RTCheckCost += C;
20782078
}
2079-
if (MemCheckBlock) {
2080-
InstructionCost MemCheckCost = 0;
2079+
if (MemCheckBlock)
20812080
for (Instruction &I : *MemCheckBlock) {
20822081
if (MemCheckBlock->getTerminator() == &I)
20832082
continue;
20842083
InstructionCost C =
20852084
TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
20862085
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
2087-
MemCheckCost += C;
2088-
}
2089-
2090-
// If the runtime memory checks are being created inside an outer loop
2091-
// we should find out if these checks are outer loop invariant. If so,
2092-
// the checks will be hoisted out and so the effective cost will reduce
2093-
// according to the outer loop trip count.
2094-
if (OuterLoop) {
2095-
ScalarEvolution *SE = MemCheckExp.getSE();
2096-
const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond);
2097-
if (SE->isLoopInvariant(Cond, OuterLoop)) {
2098-
if (std::optional<unsigned> OuterTC =
2099-
getSmallBestKnownTC(*SE, OuterLoop))
2100-
MemCheckCost /= *OuterTC;
2101-
else {
2102-
// It seems reasonable to assume that we can reduce the effective
2103-
// cost of the checks even when we know nothing about the trip
2104-
// count. Here I've assumed that the outer loop executes at least
2105-
// twice.
2106-
MemCheckCost /= 2;
2107-
}
2108-
2109-
// Let's ensure the cost is always at least 1.
2110-
if (MemCheckCost == 0)
2111-
MemCheckCost = 1;
2112-
}
2086+
RTCheckCost += C;
21132087
}
21142088

2115-
RTCheckCost += MemCheckCost;
2116-
}
2117-
21182089
if (SCEVCheckBlock || MemCheckBlock)
21192090
LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost
21202091
<< "\n");
@@ -9680,7 +9651,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
96809651
std::optional<unsigned> VScale, Loop *L,
96819652
ScalarEvolution &SE,
96829653
ScalarEpilogueLowering SEL) {
9683-
InstructionCost CheckCost = Checks.getCost(L->getParentLoop());
9654+
InstructionCost CheckCost = Checks.getCost();
96849655
if (!CheckCost.isValid())
96859656
return false;
96869657

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
77
define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
88
; CHECK-LABEL: LV: Checking a loop in 'outer_no_tc'
99
; CHECK: Calculating cost of runtime checks:
10-
; CHECK: Total cost of runtime checks: 3
10+
; CHECK: Total cost of runtime checks: 6
1111
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
1212
entry:
1313
br label %outer.loop
@@ -43,7 +43,7 @@ outer.exit:
4343
define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
4444
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3'
4545
; CHECK: Calculating cost of runtime checks:
46-
; CHECK: Total cost of runtime checks: 2
46+
; CHECK: Total cost of runtime checks: 6
4747
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
4848
entry:
4949
br label %outer.loop
@@ -79,7 +79,7 @@ outer.exit:
7979
define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
8080
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc64'
8181
; CHECK: Calculating cost of runtime checks:
82-
; CHECK: Total cost of runtime checks: 1
82+
; CHECK: Total cost of runtime checks: 6
8383
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
8484
entry:
8585
br label %outer.loop
@@ -115,7 +115,7 @@ outer.exit:
115115
define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
116116
; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_3'
117117
; CHECK: Calculating cost of runtime checks:
118-
; CHECK: Total cost of runtime checks: 2
118+
; CHECK: Total cost of runtime checks: 6
119119
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
120120
entry:
121121
br label %outer.loop
@@ -151,8 +151,8 @@ outer.exit:
151151
define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
152152
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
153153
; CHECK: Calculating cost of runtime checks:
154-
; CHECK: Total cost of runtime checks: 2
155-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:4
154+
; CHECK: Total cost of runtime checks: 6
155+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:8
156156
entry:
157157
br label %outer.loop
158158

0 commit comments

Comments
 (0)