Skip to content

Commit dce52c9

Browse files
committed
Address review comments
1 parent 8a0393b commit dce52c9

File tree

3 files changed

+13
-16
lines changed

3 files changed

+13
-16
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10194,8 +10194,7 @@ static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
1019410194
// vector.early.exit block, which may contain work to calculate the exit
1019510195
// values of variables used outside the loop.
1019610196
if (PredVPBB != Plan.getMiddleBlock())
10197-
for (auto &R : *(cast<VPBasicBlock>(PredVPBB)))
10198-
Cost += R.cost(VF, CostCtx);
10197+
Cost += PredVPBB->cost(VF, CostCtx);
1019910198
}
1020010199
}
1020110200
return Cost;
@@ -10218,10 +10217,9 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
1021810217
if (!TotalCost.isValid())
1021910218
return false;
1022010219

10221-
// Add on the cost of work required in the vector early exit block, if one
10222-
// exists.
10223-
if (CM.Legal->hasUncountableEarlyExit())
10224-
TotalCost += calculateEarlyExitCost(CM, Plan, VF.Width);
10220+
// Add on the cost of any work required in the vector early exit block, if
10221+
// one exists.
10222+
TotalCost += calculateEarlyExitCost(CM, Plan, VF.Width);
1022510223

1022610224
// When interleaving only scalar and vector cost will be equal, which in turn
1022710225
// would lead to a divide by 0. Fall back to hard threshold.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,9 +752,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
752752
InstructionCost Cost = Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
753753
// Add on the cost of extracting the element.
754754
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
755-
Cost += Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
756-
Ctx.CostKind);
757-
return Cost;
755+
return Cost + Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
756+
Ctx.CostKind);
758757
}
759758
default:
760759
// TODO: Compute cost other VPInstructions once the legacy cost model has

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define void @no_outer_loop(ptr nocapture noundef %a, ptr nocapture noundef reado
88
; CHECK: Calculating cost of runtime checks:
99
; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop.
1010
; CHECK: Total cost of runtime checks: 4
11-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
11+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
1212
entry:
1313
br label %inner.loop
1414

@@ -34,7 +34,7 @@ define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonl
3434
; CHECK: Calculating cost of runtime checks:
3535
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 3
3636
; CHECK: Total cost of runtime checks: 3
37-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
37+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
3838
entry:
3939
br label %outer.loop
4040

@@ -71,7 +71,7 @@ define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef rea
7171
; CHECK: Calculating cost of runtime checks:
7272
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
7373
; CHECK: Total cost of runtime checks: 2
74-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
74+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
7575
entry:
7676
br label %outer.loop
7777

@@ -108,7 +108,7 @@ define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef re
108108
; CHECK: Calculating cost of runtime checks:
109109
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 1
110110
; CHECK: Total cost of runtime checks: 1
111-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
111+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
112112
entry:
113113
br label %outer.loop
114114

@@ -145,7 +145,7 @@ define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonl
145145
; CHECK: Calculating cost of runtime checks:
146146
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
147147
; CHECK: Total cost of runtime checks: 2
148-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
148+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
149149
entry:
150150
br label %outer.loop
151151

@@ -182,7 +182,7 @@ define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef re
182182
; CHECK: Calculating cost of runtime checks:
183183
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 3
184184
; CHECK: Total cost of runtime checks: 3
185-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
185+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
186186
entry:
187187
br label %outer.loop
188188

@@ -219,7 +219,7 @@ define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr n
219219
; CHECK: Calculating cost of runtime checks:
220220
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
221221
; CHECK: Total cost of runtime checks: 2
222-
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:4
222+
; CHECK: LV: Minimum required TC for runtime checks to be profitable:4
223223
entry:
224224
br label %outer.loop
225225

0 commit comments

Comments
 (0)