Skip to content

Commit 6702594

Browse files
[LoopCacheAnalysis] Fix loop cache cost to always round the cost up to the nearest integer number (#88915)
Currently loop cache analysis uses following formula to evaluate cost of an RefGroup for a consecutive memory access: `RefCost=(TripCount*Stride)/CLS` This cost evaluates to zero when `TripCount*Stride` is smaller than cache-line-size. This results in wrong cost value for a loop and misleads loopInterchange decisions as shown in [this case](https://llvm.godbolt.org/z/jTz1vn4hn). This patch fixes the problem by rounding the cost to 1 once this problem happens.
1 parent 21ee278 commit 6702594

File tree

12 files changed

+122
-95
lines changed

12 files changed

+122
-95
lines changed

llvm/lib/Analysis/LoopCacheAnalysis.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,12 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
299299
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300300
TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301301
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302-
RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
302+
// Round the fractional cost up to the nearest integer number.
303+
// The impact is the most significant when cost is calculated
304+
// to be a number less than one, because it makes more sense
305+
// to say one cache line is used rather than zero cache line
306+
// is used.
307+
RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize);
303308

304309
LLVM_DEBUG(dbgs().indent(4)
305310
<< "Access is consecutive: RefCost=(TripCount*Stride)/CLS="

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
77
; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
88

99
; CHECK: Loop 'for.body' has cost = 4186116
10-
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
10+
; CHECK-NEXT: Loop 'for.body4' has cost = 130944
1111

1212
;; #define N 1024
1313
;; #define M 2048
@@ -49,7 +49,7 @@ for.end13: ; preds = %for.inc11
4949

5050

5151
; CHECK: Loop 'for.body' has cost = 4186116
52-
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
52+
; CHECK-NEXT: Loop 'for.body4' has cost = 130944
5353

5454
define void @t2(ptr %a) {
5555
entry:
@@ -87,7 +87,7 @@ declare ptr @func_with_returned_arg(ptr returned %arg)
8787
; CHECK-NEXT: Loop 'for.body4' has cost = 16762927104000000
8888
; CHECK-NEXT: Loop 'for.body8' has cost = 130960368000000
8989
; CHECK-NEXT: Loop 'for.body12' has cost = 1047682944000
90-
; CHECK-NEXT: Loop 'for.body16' has cost = 32260032000
90+
; CHECK-NEXT: Loop 'for.body16' has cost = 32772096000
9191

9292
;; #define N 128
9393
;; #define M 2048

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ for.end: ; preds = %for.cond
3838

3939
; CHECK: Loop 'for.cond' has cost = 100000000
4040
; CHECK: Loop 'for.cond1' has cost = 1000000
41-
; CHECK: Loop 'for.cond5' has cost = 30000
41+
; CHECK: Loop 'for.cond5' has cost = 40000
4242

4343
@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
4444

@@ -118,7 +118,7 @@ for.neg.end: ; preds = %for.neg.cond
118118
; access functions. When this is fixed this testcase should have a cost
119119
; approximately 2x higher.
120120

121-
; CHECK: Loop 'for.cond2' has cost = 2560
121+
; CHECK: Loop 'for.cond2' has cost = 2561
122122
define void @Test2(ptr %B) {
123123
entry:
124124
br label %for.cond2
@@ -148,7 +148,7 @@ for.end: ; preds = %for.cond
148148
; for (i = 40960; i > 0; i--)
149149
; C[i] = C[i];
150150

151-
; CHECK: Loop 'for.cond3' has cost = 2560
151+
; CHECK: Loop 'for.cond3' has cost = 2561
152152
define void @Test3(ptr %C) {
153153
entry:
154154
br label %for.cond3
@@ -177,7 +177,7 @@ for.end: ; preds = %for.cond
177177
; for (i = 0; i < 40960; i++)
178178
; D[i] = D[i];
179179

180-
; CHECK: Loop 'for.cond4' has cost = 2560
180+
; CHECK: Loop 'for.cond4' has cost = 2561
181181
define void @Test4(ptr %D) {
182182
entry:
183183
br label %for.cond4

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212

1313
; CHECK: Loop 'for.i' has cost = 3000000
14-
; CHECK-NEXT: Loop 'for.k' has cost = 2030000
15-
; CHECK-NEXT: Loop 'for.j' has cost = 1060000
14+
; CHECK-NEXT: Loop 'for.k' has cost = 2040000
15+
; CHECK-NEXT: Loop 'for.j' has cost = 1080000
1616

1717
define void @foo(i64 %n, i64 %m, i64 %o, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212

1313
; CHECK:Loop 'for.i' has cost = 2010000
14-
; CHECK-NEXT:Loop 'for.k' has cost = 1040000
15-
; CHECK-NEXT:Loop 'for.j' has cost = 70000
14+
; CHECK-NEXT:Loop 'for.k' has cost = 1050000
15+
; CHECK-NEXT:Loop 'for.j' has cost = 90000
1616

1717
define void @matmul(i64 %n, i64 %m, i64 %o, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1717
; CHECK: Loop 'k_loop' has cost = 10200000000000000
1818
; CHECK-NEXT: Loop 'j_loop' has cost = 102000000000000
1919
; CHECK-NEXT: Loop 'i_loop' has cost = 1020000000000
20-
; CHECK-NEXT: Loop 'm_loop' has cost = 10700000000
21-
; CHECK-NEXT: Loop 'l_loop' has cost = 1300000000
20+
; CHECK-NEXT: Loop 'm_loop' has cost = 10800000000
21+
; CHECK-NEXT: Loop 'l_loop' has cost = 1500000000
2222

2323
%_elem_type_of_double = type <{ double }>
2424

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
55

66
; CHECK: Loop 'for.j' has cost = 201000000
77
; CHECK-NEXT: Loop 'for.i' has cost = 102000000
8-
; CHECK-NEXT: Loop 'for.k' has cost = 90000
8+
; CHECK-NEXT: Loop 'for.k' has cost = 120000
99

1010
;; Test to make sure when we have multiple conflicting access patterns, the
1111
;; chosen loop configuration favours the majority of those accesses.

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
1212

1313
; CHECK: Loop 'for.i' has cost = 100000000
1414
; CHECK-NEXT: Loop 'for.j' has cost = 1000000
15-
; CHECK-NEXT: Loop 'for.k' has cost = 60000
15+
; CHECK-NEXT: Loop 'for.k' has cost = 70000
1616

1717
define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
1818
entry:
@@ -90,7 +90,7 @@ for.end: ; preds = %for.end.loopexit, %
9090

9191
; CHECK: Loop 'for.i' has cost = 100000000
9292
; CHECK-NEXT: Loop 'for.j' has cost = 1000000
93-
; CHECK-NEXT: Loop 'for.k' has cost = 60000
93+
; CHECK-NEXT: Loop 'for.k' has cost = 70000
9494

9595
define void @foo2(i64 %n, i64 %m, i64 %o, ptr %A) {
9696
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212
; }
1313

14-
; CHECK: Loop 'for.i' has cost = 20600
15-
; CHECK-NEXT: Loop 'for.j' has cost = 800
14+
; CHECK: Loop 'for.i' has cost = 20800
15+
; CHECK-NEXT: Loop 'for.j' has cost = 1000
1616

1717
define void @foo(i64 %n, i64 %m, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
; Check IndexedReference::computeRefCost can handle type differences between
99
; Stride and TripCount
1010

11+
; Round costs up to the nearest whole number i.e. in 'for.cond5' cost is calculated 12.5 and
12+
; it makes more sense to say 13 cache lines are used rather than 12 cache lines.
13+
1114
; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256
1215
; LARGER-CACHELINE: Loop 'for.cond' has cost = 32
1316
%struct._Handleitem = type { ptr }
@@ -40,10 +43,10 @@ for.end: ; preds = %for.cond
4043

4144
; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
4245
; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
43-
; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000
46+
; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 130000
4447
; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
4548
; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
46-
; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000
49+
; LARGER-CACHELINE: Loop 'for.cond5' has cost = 20000
4750
@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
4851

4952
define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
@@ -122,8 +125,8 @@ for.neg.end: ; preds = %for.neg.cond
122125
; access functions. When this is fixed this testcase should have a cost
123126
; approximately 2x higher.
124127

125-
; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10240
126-
; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1280
128+
; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241
129+
; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281
127130
define void @Test2(ptr %B) {
128131
entry:
129132
br label %for.cond2
@@ -153,8 +156,8 @@ for.end: ; preds = %for.cond
153156
; for (i = 40960; i > 0; i--)
154157
; C[i] = C[i];
155158

156-
; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10240
157-
; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1280
159+
; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10241
160+
; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1281
158161
define void @Test3(ptr %C) {
159162
entry:
160163
br label %for.cond3
@@ -183,8 +186,8 @@ for.end: ; preds = %for.cond
183186
; for (i = 0; i < 40960; i++)
184187
; D[i] = D[i];
185188

186-
; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10240
187-
; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1280
189+
; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10241
190+
; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1281
188191
define void @Test4(ptr %D) {
189192
entry:
190193
br label %for.cond4
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; RUN: opt < %s -cache-line-size=64 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
2+
3+
;; This test checks the effect of rounding cache cost to 1 when it is
4+
;; evaluated to 0 because at least 1 cache line is accessed by the loopnest.
5+
;; It does not make sense to output that zero cache lines are used.
6+
;; The cost of reference group for B[j], C[j], D[j] and E[j] were
7+
;; calculted 0 before but now they are 1 which makes each loop cost more reasonable.
8+
;
9+
; void test(int n, int m, int o, int A[2][3], int B[2], int C[2], int D[2], int E[2]) {
10+
; for (int i = 0; i < 3; i++)
11+
; for (int j = 0; j < 2; j++)
12+
; A[j][i] = 1;
13+
; B[j] = 1;
14+
; C[j] = 1;
15+
; D[j] = 1
16+
; E[j] = 1
17+
; }
18+
19+
; CHECK: Loop 'for.j' has cost = 18
20+
; CHECK-NEXT: Loop 'for.i' has cost = 10
21+
22+
define void @test(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E) {
23+
24+
entry:
25+
br label %for.i.preheader.split
26+
27+
for.i.preheader.split: ; preds = %for.i.preheader
28+
br label %for.i
29+
30+
for.i: ; preds = %for.inci, %for.i.preheader.split
31+
%i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ]
32+
br label %for.j
33+
34+
for.j: ; preds = %for.incj, %for.i
35+
%j = phi i64 [ %incj, %for.j ], [ 0, %for.i ]
36+
%mul_j = mul nsw i64 %j, 3
37+
%index_j = add i64 %mul_j, %i
38+
%arrayidxA = getelementptr inbounds [2 x [ 3 x i32]], ptr %A, i64 %j, i64 %i
39+
store i32 1, ptr %arrayidxA, align 4
40+
%arrayidxB = getelementptr inbounds i32, ptr %B, i64 %j
41+
store i32 1, ptr %arrayidxB, align 4
42+
%arrayidxC = getelementptr inbounds i32, ptr %C, i64 %j
43+
store i32 1, ptr %arrayidxC, align 4
44+
%arrayidxD = getelementptr inbounds i32, ptr %D, i64 %j
45+
store i32 1, ptr %arrayidxD, align 4
46+
%arrayidxE = getelementptr inbounds i32, ptr %E, i64 %j
47+
store i32 1, ptr %arrayidxE, align 4
48+
%incj = add nsw i64 %j, 1
49+
%exitcond.us = icmp eq i64 %incj, 2
50+
br i1 %exitcond.us, label %for.inci, label %for.j
51+
52+
for.inci: ; preds = %for.incj
53+
%inci = add nsw i64 %i, 1
54+
%exitcond55.us = icmp eq i64 %inci, 3
55+
br i1 %exitcond55.us, label %for.end.loopexit, label %for.i
56+
57+
for.end.loopexit: ; preds = %for.inci
58+
br label %for.end
59+
60+
for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry
61+
ret void
62+
}

llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll

Lines changed: 25 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,25 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=loop-interchange -cache-line-size=64 -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
2+
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S
3+
; RUN: FileCheck --input-file=%t %s
34

45
@b = external dso_local global [5 x i32], align 16
56

7+
;; Not profitable to interchange, because the access is invariant to j loop.
8+
;;
9+
;; for(int i=0;i<4;i++) {
10+
;; for(int j=1;j<4;j++) {
11+
;; b[i] = ....
12+
;; }
13+
;; }
14+
15+
; CHECK: --- !Missed
16+
; CHECK-NEXT: Pass: loop-interchange
17+
; CHECK-NEXT: Name: InterchangeNotProfitable
18+
; CHECK-NEXT: Function: test1
19+
; CHECK-NEXT: Args:
20+
; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
21+
622
define void @test1() {
7-
; CHECK-LABEL: @test1(
8-
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
10-
; CHECK: for.body.preheader:
11-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
12-
; CHECK: for.body:
13-
; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
14-
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
15-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 [[IDXPROM]]
16-
; CHECK-NEXT: br label [[FOR_INC:%.*]]
17-
; CHECK: for.body2.preheader:
18-
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
19-
; CHECK: for.body2:
20-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
21-
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
22-
; CHECK: for.inc:
23-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
24-
; CHECK-NEXT: store i32 undef, ptr [[ARRAYIDX]], align 4
25-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
26-
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
27-
; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
28-
; CHECK: for.inc.split:
29-
; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
30-
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
31-
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
32-
; CHECK: for.cond1.for.end_crit_edge:
33-
; CHECK-NEXT: br label [[FOR_INC3]]
34-
; CHECK: for.inc3:
35-
; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
36-
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
37-
; CHECK: for.cond.for.end5_crit_edge:
38-
; CHECK-NEXT: ret void
39-
;
4023
entry:
4124
br label %for.body
4225

@@ -68,41 +51,15 @@ for.cond.for.end5_crit_edge: ; preds = %for.inc3
6851
ret void
6952
}
7053

54+
55+
; CHECK: --- !Missed
56+
; CHECK-NEXT: Pass: loop-interchange
57+
; CHECK-NEXT: Name: InterchangeNotProfitable
58+
; CHECK-NEXT: Function: test2
59+
; CHECK-NEXT: Args:
60+
; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
61+
7162
define void @test2() {
72-
; CHECK-LABEL: @test2(
73-
; CHECK-NEXT: entry:
74-
; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
75-
; CHECK: for.body.preheader:
76-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
77-
; CHECK: for.body:
78-
; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
79-
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
80-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 [[IDXPROM]]
81-
; CHECK-NEXT: br label [[FOR_INC:%.*]]
82-
; CHECK: for.body2.preheader:
83-
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
84-
; CHECK: for.body2:
85-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
86-
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
87-
; CHECK: for.inc:
88-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
89-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
90-
; CHECK-NEXT: [[CMP_ZEXT:%.*]] = zext i1 [[CMP]] to i32
91-
; CHECK-NEXT: store i32 [[CMP_ZEXT]], ptr [[ARRAYIDX]], align 4
92-
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
93-
; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
94-
; CHECK: for.inc.split:
95-
; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
96-
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
97-
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
98-
; CHECK: for.cond1.for.end_crit_edge:
99-
; CHECK-NEXT: br label [[FOR_INC3]]
100-
; CHECK: for.inc3:
101-
; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
102-
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
103-
; CHECK: for.cond.for.end5_crit_edge:
104-
; CHECK-NEXT: ret void
105-
;
10663
entry:
10764
br label %for.body
10865

0 commit comments

Comments
 (0)