Skip to content

Commit 38cadab

Browse files
[WIP] Use CodeSize cost kind for optsize
1 parent 86779da commit 38cadab

File tree

11 files changed

+641
-419
lines changed

11 files changed

+641
-419
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 67 additions & 68 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
779779
InstructionCost BackedgeCost =
780780
ForceTargetInstructionCost.getNumOccurrences()
781781
? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
782-
: Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
782+
: Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
783783
LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
784784
<< ": vector loop backedge\n");
785785
Cost += BackedgeCost;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,11 +692,13 @@ struct VPCostContext {
692692
LLVMContext &LLVMCtx;
693693
LoopVectorizationCostModel &CM;
694694
SmallPtrSet<Instruction *, 8> SkipCostComputation;
695+
TargetTransformInfo::TargetCostKind CostKind;
695696

696697
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
697-
Type *CanIVTy, LoopVectorizationCostModel &CM)
698+
Type *CanIVTy, LoopVectorizationCostModel &CM,
699+
TargetTransformInfo::TargetCostKind CostKind)
698700
: TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
699-
CM(CM) {}
701+
CM(CM), CostKind(CostKind) {}
700702

701703
/// Return the cost for \p UI with \p VF using the legacy cost model as
702704
/// fallback until computing the cost of all recipes migrates to VPlan.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
924924

925925
InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,
926926
VPCostContext &Ctx) const {
927-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
927+
TTI::TargetCostKind CostKind = Ctx.CostKind;
928928
return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
929929
Variant->getFunctionType()->params(),
930930
CostKind);
@@ -1004,7 +1004,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
10041004

10051005
InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10061006
VPCostContext &Ctx) const {
1007-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1007+
TTI::TargetCostKind CostKind = Ctx.CostKind;
10081008

10091009
// Some backends analyze intrinsic arguments to determine cost. Use the
10101010
// underlying value for the operand if it has one. Otherwise try to use the
@@ -1144,8 +1144,7 @@ InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
11441144
{PtrTy, IncTy, MaskTy});
11451145

11461146
// Add the costs together with the add/sub operation.
1147-
return Ctx.TTI.getIntrinsicInstrCost(
1148-
ICA, TargetTransformInfo::TCK_RecipThroughput) +
1147+
return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) +
11491148
MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy);
11501149
}
11511150

@@ -1207,7 +1206,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
12071206
bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
12081207
Type *ScalarTy = Ctx.Types.inferScalarType(this);
12091208
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1210-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1209+
TTI::TargetCostKind CostKind = Ctx.CostKind;
12111210

12121211
VPValue *Op0, *Op1;
12131212
using namespace llvm::VPlanPatternMatch;
@@ -1380,7 +1379,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
13801379

13811380
InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
13821381
VPCostContext &Ctx) const {
1383-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1382+
TTI::TargetCostKind CostKind = Ctx.CostKind;
13841383
switch (Opcode) {
13851384
case Instruction::FNeg: {
13861385
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
@@ -1572,7 +1571,7 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
15721571
auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF));
15731572
// Arm TTI will use the underlying instruction to determine the cost.
15741573
return Ctx.TTI.getCastInstrCost(
1575-
Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1574+
Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
15761575
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
15771576
}
15781577

@@ -1590,7 +1589,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
15901589

15911590
InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
15921591
VPCostContext &Ctx) const {
1593-
return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
1592+
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
15941593
}
15951594

15961595
/// This function adds
@@ -2081,7 +2080,7 @@ void VPBlendRecipe::execute(VPTransformState &State) {
20812080

20822081
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
20832082
VPCostContext &Ctx) const {
2084-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2083+
TTI::TargetCostKind CostKind = Ctx.CostKind;
20852084

20862085
// Handle cases where only the first lane is used the same way as the legacy
20872086
// cost model.
@@ -2211,7 +2210,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
22112210
RecurKind RdxKind = RdxDesc.getRecurrenceKind();
22122211
Type *ElementTy = Ctx.Types.inferScalarType(this);
22132212
auto *VectorTy = cast<VectorType>(ToVectorTy(ElementTy, VF));
2214-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2213+
TTI::TargetCostKind CostKind = Ctx.CostKind;
22152214
unsigned Opcode = RdxDesc.getOpcode();
22162215

22172216
// TODO: Support any-of and in-loop reductions.
@@ -2466,7 +2465,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
24662465
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
24672466
unsigned AS =
24682467
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2469-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2468+
TTI::TargetCostKind CostKind = Ctx.CostKind;
24702469

24712470
if (!Consecutive) {
24722471
// TODO: Using the original IR may not be accurate.
@@ -2613,7 +2612,7 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
26132612
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
26142613
unsigned AS =
26152614
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2616-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2615+
TTI::TargetCostKind CostKind = Ctx.CostKind;
26172616
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
26182617
Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
26192618
if (!Reverse)
@@ -2734,7 +2733,7 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
27342733
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
27352734
unsigned AS =
27362735
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
2737-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2736+
TTI::TargetCostKind CostKind = Ctx.CostKind;
27382737
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
27392738
Ingredient.getOpcode(), Ty, Alignment, AS, CostKind);
27402739
if (!Reverse)
@@ -3099,7 +3098,7 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
30993098
: getStoredValues()[InsertPosIdx]);
31003099
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
31013100
unsigned AS = getLoadStoreAddressSpace(InsertPos);
3102-
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
3101+
enum TTI::TargetCostKind CostKind = Ctx.CostKind;
31033102

31043103
unsigned InterleaveFactor = IG->getFactor();
31053104
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
@@ -3336,7 +3335,7 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
33363335
InstructionCost
33373336
VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
33383337
VPCostContext &Ctx) const {
3339-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
3338+
TTI::TargetCostKind CostKind = Ctx.CostKind;
33403339
if (VF.isScalar())
33413340
return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
33423341

llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; REQUIRES: asserts
33
; RUN: opt < %s -passes=loop-vectorize -disable-output -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefix=COST
4-
; RUN: opt < %s -passes=loop-vectorize,instcombine,simplifycfg -force-vector-width=2 -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
4+
; RUN: opt < %s -passes=loop-vectorize,instcombine,simplifycfg -force-vector-width=2 -force-vector-interleave=1 -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
55

66
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
77
target triple = "aarch64--linux-gnu"
88

99
; This test checks that we correctly compute the scalarized operands for a
1010
; user-specified vectorization factor when interleaving is disabled. We use the
11-
; "optsize" attribute to disable all interleaving calculations. A cost of 4
12-
; for %var4 indicates that we would scalarize it's operand (%var3), giving
13-
; %var4 a lower scalarization overhead.
11+
; -force-vector-interleave=1 option to disable all interleaving calculations.
12+
; A cost of 4 for %var4 indicates that we would scalarize it's operand (%var3),
13+
; giving %var4 a lower scalarization overhead.
1414
;
1515
; COST-LABEL: predicated_udiv_scalarized_operand
1616
; COST: Cost of 5 for VF 2: profitable to scalarize %var4 = udiv i64 %var2, %var3
1717
;
1818
;
19-
define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) optsize {
19+
define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
2020
; CHECK-LABEL: @predicated_udiv_scalarized_operand(
2121
; CHECK-NEXT: entry:
2222
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 4 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1592,54 +1592,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
15921592
; DEFAULT-LABEL: define void @redundant_branch_and_tail_folding(
15931593
; DEFAULT-SAME: ptr [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
15941594
; DEFAULT-NEXT: entry:
1595-
; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1596-
; DEFAULT: vector.ph:
1597-
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
1598-
; DEFAULT: vector.body:
1599-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
1600-
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
1601-
; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
1602-
; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
1603-
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
1604-
; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1605-
; DEFAULT-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1606-
; DEFAULT: pred.store.if:
1607-
; DEFAULT-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
1608-
; DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
1609-
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE]]
1610-
; DEFAULT: pred.store.continue:
1611-
; DEFAULT-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1612-
; DEFAULT-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
1613-
; DEFAULT: pred.store.if1:
1614-
; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
1615-
; DEFAULT-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
1616-
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE2]]
1617-
; DEFAULT: pred.store.continue2:
1618-
; DEFAULT-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1619-
; DEFAULT-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
1620-
; DEFAULT: pred.store.if3:
1621-
; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
1622-
; DEFAULT-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
1623-
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE4]]
1624-
; DEFAULT: pred.store.continue4:
1625-
; DEFAULT-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1626-
; DEFAULT-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
1627-
; DEFAULT: pred.store.if5:
1628-
; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1629-
; DEFAULT-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
1630-
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]]
1631-
; DEFAULT: pred.store.continue6:
1632-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1633-
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1634-
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1635-
; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1636-
; DEFAULT: middle.block:
1637-
; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1638-
; DEFAULT: scalar.ph:
1639-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
16401595
; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]]
16411596
; DEFAULT: loop.header:
1642-
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1597+
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
16431598
; DEFAULT-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
16441599
; DEFAULT: then:
16451600
; DEFAULT-NEXT: br label [[LOOP_LATCH]]
@@ -1648,61 +1603,16 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
16481603
; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
16491604
; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4
16501605
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21
1651-
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]]
1606+
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
16521607
; DEFAULT: exit:
16531608
; DEFAULT-NEXT: ret void
16541609
;
16551610
; PRED-LABEL: define void @redundant_branch_and_tail_folding(
16561611
; PRED-SAME: ptr [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
16571612
; PRED-NEXT: entry:
1658-
; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1659-
; PRED: vector.ph:
1660-
; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
1661-
; PRED: vector.body:
1662-
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
1663-
; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
1664-
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
1665-
; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
1666-
; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
1667-
; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1668-
; PRED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1669-
; PRED: pred.store.if:
1670-
; PRED-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
1671-
; PRED-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
1672-
; PRED-NEXT: br label [[PRED_STORE_CONTINUE]]
1673-
; PRED: pred.store.continue:
1674-
; PRED-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1675-
; PRED-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
1676-
; PRED: pred.store.if1:
1677-
; PRED-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
1678-
; PRED-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
1679-
; PRED-NEXT: br label [[PRED_STORE_CONTINUE2]]
1680-
; PRED: pred.store.continue2:
1681-
; PRED-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1682-
; PRED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
1683-
; PRED: pred.store.if3:
1684-
; PRED-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
1685-
; PRED-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
1686-
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
1687-
; PRED: pred.store.continue4:
1688-
; PRED-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1689-
; PRED-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
1690-
; PRED: pred.store.if5:
1691-
; PRED-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
1692-
; PRED-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
1693-
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
1694-
; PRED: pred.store.continue6:
1695-
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1696-
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1697-
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1698-
; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
1699-
; PRED: middle.block:
1700-
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1701-
; PRED: scalar.ph:
1702-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
17031613
; PRED-NEXT: br label [[LOOP_HEADER:%.*]]
17041614
; PRED: loop.header:
1705-
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1615+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
17061616
; PRED-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
17071617
; PRED: then:
17081618
; PRED-NEXT: br label [[LOOP_LATCH]]
@@ -1711,7 +1621,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
17111621
; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
17121622
; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4
17131623
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21
1714-
; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]]
1624+
; PRED-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
17151625
; PRED: exit:
17161626
; PRED-NEXT: ret void
17171627
;
@@ -1771,8 +1681,6 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
17711681
; DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
17721682
; DEFAULT: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
17731683
; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]}
1774-
; DEFAULT: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
1775-
; DEFAULT: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
17761684
;.
17771685
; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
17781686
; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1799,6 +1707,4 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
17991707
; PRED: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]}
18001708
; PRED: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
18011709
; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]]}
1802-
; PRED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]}
1803-
; PRED: [[LOOP26]] = distinct !{[[LOOP26]], [[META2]], [[META1]]}
18041710
;.

0 commit comments

Comments
 (0)