Skip to content

Commit 0f82469

Browse files
authored
[Passes] Run SimpleLoopUnswitch after introducing invariant branches. (#81271)
IndVars may be able to replace a loop dependent condition with a loop invariant one, but loop-unswitch runs before IndVars, so the invariant check remains in the loop. For an example, consider a read-only loop with a bounds check: https://godbolt.org/z/8cdj4qhbG This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9). It introduces a new ShouldRunExtraSimpleLoopUnswitch analysis marker, which IndVars can use to indicate that extra unswitching is beneficial. ExtraSimpleLoopUnswitchPassManager uses this analysis to determine whether to run its passes on a loop. Compile-time impact (geomean) ranges from +0.0% to 0.02% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=19e6e99eeb280d426907ea73a21b139ba7225627&stat=instructions%3Au Compile-time impact (geomean) of unconditionally running SimpleLoopUnswitch ranges from +0.05% - +0.16% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=2930dfd5accdce2e6f8d5146ae4d626add2065a2&stat=instructions:u Unconditionally running SimpleLoopUnswitch seems to indicate that there are multiple other scenarios where we fail to run unswitching when opportunities remain. Fixes #85551. PR: #81271
1 parent 8a4b7de commit 0f82469

File tree

8 files changed

+169
-37
lines changed

8 files changed

+169
-37
lines changed

llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "llvm/ADT/STLFunctionalExtras.h"
1313
#include "llvm/Analysis/LoopAnalysisManager.h"
1414
#include "llvm/IR/PassManager.h"
15+
#include "llvm/Transforms/Scalar/LoopPassManager.h"
1516

1617
namespace llvm {
1718

@@ -20,6 +21,40 @@ class Loop;
2021
class StringRef;
2122
class raw_ostream;
2223

24+
struct ShouldRunExtraSimpleLoopUnswitch
25+
: public AnalysisInfoMixin<ShouldRunExtraSimpleLoopUnswitch> {
26+
static AnalysisKey Key;
27+
struct Result {
28+
bool invalidate(Loop &L, const PreservedAnalyses &PA,
29+
LoopAnalysisManager::Invalidator &) {
30+
// Check whether the analysis has been explicitly invalidated. Otherwise,
31+
// it remains preserved.
32+
auto PAC = PA.getChecker<ShouldRunExtraSimpleLoopUnswitch>();
33+
return !PAC.preservedWhenStateless();
34+
}
35+
};
36+
37+
Result run(Loop &L, LoopAnalysisManager &AM,
38+
LoopStandardAnalysisResults &AR) {
39+
return Result();
40+
}
41+
42+
static bool isRequired() { return true; }
43+
};
44+
45+
struct ExtraSimpleLoopUnswitchPassManager : public LoopPassManager {
46+
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
47+
LoopStandardAnalysisResults &AR, LPMUpdater &U) {
48+
auto PA = PreservedAnalyses::all();
49+
if (AM.getCachedResult<ShouldRunExtraSimpleLoopUnswitch>(L))
50+
PA.intersect(LoopPassManager::run(L, AM, AR, U));
51+
PA.abandon<ShouldRunExtraSimpleLoopUnswitch>();
52+
return PA;
53+
}
54+
55+
static bool isRequired() { return true; }
56+
};
57+
2358
/// This pass transforms loops that contain branches or switches on loop-
2459
/// invariant conditions to have multiple loops. For example, it turns the left
2560
/// into the right code:

llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
1616
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
1717

18+
#include <utility>
19+
1820
namespace llvm {
1921

2022
class Type;
@@ -46,11 +48,16 @@ class IVVisitor {
4648
};
4749

4850
/// simplifyUsersOfIV - Simplify instructions that use this induction variable
49-
/// by using ScalarEvolution to analyze the IV's recurrence.
50-
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
51-
LoopInfo *LI, const TargetTransformInfo *TTI,
52-
SmallVectorImpl<WeakTrackingVH> &Dead,
53-
SCEVExpander &Rewriter, IVVisitor *V = nullptr);
51+
/// by using ScalarEvolution to analyze the IV's recurrence. Returns a pair
52+
/// where the first entry indicates that the function makes changes and the
53+
/// second entry indicates that it introduced new opportunities for loop
54+
/// unswitching.
55+
std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
56+
DominatorTree *DT, LoopInfo *LI,
57+
const TargetTransformInfo *TTI,
58+
SmallVectorImpl<WeakTrackingVH> &Dead,
59+
SCEVExpander &Rewriter,
60+
IVVisitor *V = nullptr);
5461

5562
/// SimplifyLoopIVs - Simplify users of induction variables within this
5663
/// loop. This does not actually change or add IVs.

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
651651
LPM2.addPass(LoopIdiomRecognizePass());
652652
LPM2.addPass(IndVarSimplifyPass());
653653

654+
{
655+
ExtraSimpleLoopUnswitchPassManager ExtraPasses;
656+
ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
657+
OptimizationLevel::O3));
658+
LPM2.addPass(std::move(ExtraPasses));
659+
}
660+
654661
invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
655662

656663
LPM2.addPass(LoopDeletionPass());

llvm/lib/Passes/PassRegistry.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,9 @@ LOOP_ANALYSIS("ddg", DDGAnalysis())
587587
LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
588588
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
589589
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
590+
LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
591+
ShouldRunExtraSimpleLoopUnswitch())
592+
590593
#undef LOOP_ANALYSIS
591594

592595
#ifndef LOOP_PASS

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "llvm/Support/Debug.h"
7171
#include "llvm/Support/MathExtras.h"
7272
#include "llvm/Support/raw_ostream.h"
73+
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
7374
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
7475
#include "llvm/Transforms/Utils/Local.h"
7576
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -137,6 +138,8 @@ class IndVarSimplify {
137138
SmallVector<WeakTrackingVH, 16> DeadInsts;
138139
bool WidenIndVars;
139140

141+
bool RunUnswitching = false;
142+
140143
bool handleFloatingPointIV(Loop *L, PHINode *PH);
141144
bool rewriteNonIntegerIVs(Loop *L);
142145

@@ -170,6 +173,8 @@ class IndVarSimplify {
170173
}
171174

172175
bool run(Loop *L);
176+
177+
bool runUnswitching() const { return RunUnswitching; }
173178
};
174179

175180
} // end anonymous namespace
@@ -615,9 +620,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
615620
// Information about sign/zero extensions of CurrIV.
616621
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
617622

618-
Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
619-
&Visitor);
623+
const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
624+
Rewriter, &Visitor);
620625

626+
Changed |= C;
627+
RunUnswitching |= U;
621628
if (Visitor.WI.WidestNativeType) {
622629
WideIVs.push_back(Visitor.WI);
623630
}
@@ -1874,6 +1881,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
18741881
if (OldCond->use_empty())
18751882
DeadInsts.emplace_back(OldCond);
18761883
Changed = true;
1884+
RunUnswitching = true;
18771885
}
18781886

18791887
return Changed;
@@ -2059,6 +2067,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
20592067

20602068
auto PA = getLoopPassPreservedAnalyses();
20612069
PA.preserveSet<CFGAnalyses>();
2070+
if (IVS.runUnswitching()) {
2071+
AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2072+
PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2073+
}
2074+
20622075
if (AR.MSSA)
20632076
PA.preserve<MemorySSAAnalysis>();
20642077
return PA;

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
133133
"not-taken 1/<this option> times or less."),
134134
cl::init(16));
135135

136+
AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
136137
namespace {
137138
struct CompareDesc {
138139
BranchInst *Term;

llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace {
6060
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
6161

6262
bool Changed = false;
63+
bool RunUnswitching = false;
6364

6465
public:
6566
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
@@ -72,6 +73,7 @@ namespace {
7273
}
7374

7475
bool hasChanged() const { return Changed; }
76+
bool runUnswitching() const { return RunUnswitching; }
7577

7678
/// Iteratively perform simplification on a worklist of users of the
7779
/// specified induction variable. This is the top-level driver that applies
@@ -233,6 +235,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
233235
ICmp->setPredicate(InvariantPredicate);
234236
ICmp->setOperand(0, NewLHS);
235237
ICmp->setOperand(1, NewRHS);
238+
RunUnswitching = true;
236239
return true;
237240
}
238241

@@ -993,14 +996,18 @@ void IVVisitor::anchor() { }
993996

994997
/// Simplify instructions that use this induction variable
995998
/// by using ScalarEvolution to analyze the IV's recurrence.
996-
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
997-
LoopInfo *LI, const TargetTransformInfo *TTI,
998-
SmallVectorImpl<WeakTrackingVH> &Dead,
999-
SCEVExpander &Rewriter, IVVisitor *V) {
999+
/// Returns a pair where the first entry indicates that the function makes
1000+
/// changes and the second entry indicates that it introduced new opportunities
1001+
/// for loop unswitching.
1002+
std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
1003+
DominatorTree *DT, LoopInfo *LI,
1004+
const TargetTransformInfo *TTI,
1005+
SmallVectorImpl<WeakTrackingVH> &Dead,
1006+
SCEVExpander &Rewriter, IVVisitor *V) {
10001007
SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
10011008
Rewriter, Dead);
10021009
SIV.simplifyUsers(CurrIV, V);
1003-
return SIV.hasChanged();
1010+
return {SIV.hasChanged(), SIV.runUnswitching()};
10041011
}
10051012

10061013
/// Simplify users of induction variables within this
@@ -1014,8 +1021,9 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
10141021
#endif
10151022
bool Changed = false;
10161023
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1017-
Changed |=
1024+
const auto &[C, _] =
10181025
simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
1026+
Changed |= C;
10191027
}
10201028
return Changed;
10211029
}

llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll

Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,50 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
1414
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
1515
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
1616
; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i32 [[TMP1]], [[COUNT]]
17+
; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[FOR_BODY_PREHEADER10:%.*]], label [[IF_THEN:%.*]]
18+
; CHECK: for.body.preheader10:
19+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
20+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]]
21+
; CHECK: vector.ph:
22+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288
23+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
24+
; CHECK: vector.body:
25+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
27+
; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
28+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
29+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
30+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
31+
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
32+
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
33+
; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]]
34+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
35+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
36+
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
37+
; CHECK: middle.block:
38+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
39+
; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
40+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
41+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
42+
; CHECK: for.body.preheader13:
43+
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
44+
; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1745
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1846
; CHECK: for.cond.cleanup:
19-
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
47+
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
2048
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
2149
; CHECK: for.body:
22-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
23-
; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD]], [[IF_END]] ]
24-
; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
25-
; CHECK: if.then:
26-
; CHECK-NEXT: tail call void @llvm.trap()
27-
; CHECK-NEXT: unreachable
28-
; CHECK: if.end:
50+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ]
51+
; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ]
2952
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
30-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
31-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP2]], [[SUM_07]]
53+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
54+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]]
3255
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3356
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
34-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
57+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
58+
; CHECK: if.then:
59+
; CHECK-NEXT: tail call void @llvm.trap()
60+
; CHECK-NEXT: unreachable
3561
;
3662
entry:
3763
%array.addr = alloca ptr, align 8
@@ -103,24 +129,50 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1
103129
; CHECK: for.body.preheader:
104130
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
105131
; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]]
132+
; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[ENTRY:%.*]], label [[COND_FALSE_I:%.*]], !prof [[PROF4:![0-9]+]]
133+
; CHECK: for.body.preheader8:
134+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
135+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]]
136+
; CHECK: vector.ph:
137+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
106138
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
139+
; CHECK: vector.body:
140+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
141+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ]
142+
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ]
143+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[INDEX]]
144+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
145+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
146+
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
147+
; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
148+
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD10]], [[VEC_PHI9]]
149+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
150+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
151+
; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
152+
; CHECK: middle.block:
153+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
154+
; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
155+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
156+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]]
157+
; CHECK: for.body.preheader11:
158+
; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
159+
; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
160+
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
107161
; CHECK: for.cond.cleanup:
108-
; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ]
109-
; CHECK-NEXT: ret i32 [[RET_0_LCSSA]]
162+
; CHECK-NEXT: [[RET_0_LCSSA1:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ [[ADD1:%.*]], [[FOR_BODY1]] ]
163+
; CHECK-NEXT: ret i32 [[RET_0_LCSSA1]]
110164
; CHECK: for.body:
111-
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
112-
; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
113-
; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[COND_FALSE_I:%.*]], !prof [[PROF0:![0-9]+]]
114-
; CHECK: cond.false.i:
115-
; CHECK-NEXT: tail call void @llvm.trap()
116-
; CHECK-NEXT: unreachable
117-
; CHECK: span_checked_access.exit:
165+
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY1]] ], [ [[I_07_PH]], [[FOR_BODY_PREHEADER11]] ]
166+
; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD1]], [[FOR_BODY1]] ], [ [[RET_0_LCSSA]], [[FOR_BODY_PREHEADER11]] ]
118167
; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[I_07]]
119168
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4
120-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[RET_06]]
169+
; CHECK-NEXT: [[ADD1]] = add nsw i32 [[TMP7]], [[RET_06]]
121170
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_07]], 1
122171
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
123-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
172+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP6:![0-9]+]]
173+
; CHECK: cond.false.i:
174+
; CHECK-NEXT: tail call void @llvm.trap()
175+
; CHECK-NEXT: unreachable
124176
;
125177
entry:
126178
%s = alloca %"class.std::__1::span", align 8
@@ -176,7 +228,7 @@ define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_acces
176228
; CHECK-NEXT: [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
177229
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8
178230
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], [[__IDX]]
179-
; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF0]]
231+
; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF4]]
180232
; CHECK: cond.false:
181233
; CHECK-NEXT: tail call void @llvm.trap()
182234
; CHECK-NEXT: unreachable
@@ -237,5 +289,11 @@ declare void @llvm.trap()
237289

238290
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
239291
;.
240-
; CHECK: [[PROF0]] = !{!"branch_weights", i32 2000, i32 1}
292+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
293+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
294+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
295+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
296+
; CHECK: [[PROF4]] = !{!"branch_weights", i32 2000, i32 1}
297+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
298+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
241299
;.

0 commit comments

Comments
 (0)