Skip to content

Commit 73b2593

Browse files
[LoopCacheAnalysis] Fix loop cache cost to always round the cost up to the nearest integer number.
Currently loop cache analysis uses following formula to evaluate cost of an RefGroup for a consecutive memory access: "RefCost=(TripCount*Stride)/CLS". When the cost is fractional, it's always rounded to the smaller integer number but this is problematic specially when when the cost is calculated to be a number less than one which will be rounded to zero. But it makes more sense to say one cache line is used in this case rather than zero cache lines. This patch fixes the problem by always rounding the cost up to the nearest larger integer number.
1 parent 421a8c5 commit 73b2593

File tree

12 files changed

+122
-95
lines changed

12 files changed

+122
-95
lines changed

llvm/lib/Analysis/LoopCacheAnalysis.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,12 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
299299
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300300
TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301301
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302-
RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
302+
// Round the fractional cost up to the nearest integer number.
303+
// The impact is the most significant when cost is calculated
304+
// to be a number less than one, because it makes more sense
305+
// to say one cache line is used rather than zero cache line
306+
// is used.
307+
RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize);
303308

304309
LLVM_DEBUG(dbgs().indent(4)
305310
<< "Access is consecutive: RefCost=(TripCount*Stride)/CLS="

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
77
; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
88

99
; CHECK: Loop 'for.body' has cost = 4186116
10-
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
10+
; CHECK-NEXT: Loop 'for.body4' has cost = 130944
1111

1212
;; #define N 1024
1313
;; #define M 2048
@@ -49,7 +49,7 @@ for.end13: ; preds = %for.inc11
4949

5050

5151
; CHECK: Loop 'for.body' has cost = 4186116
52-
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
52+
; CHECK-NEXT: Loop 'for.body4' has cost = 130944
5353

5454
define void @t2(ptr %a) {
5555
entry:
@@ -87,7 +87,7 @@ declare ptr @func_with_returned_arg(ptr returned %arg)
8787
; CHECK-NEXT: Loop 'for.body4' has cost = 16762927104000000
8888
; CHECK-NEXT: Loop 'for.body8' has cost = 130960368000000
8989
; CHECK-NEXT: Loop 'for.body12' has cost = 1047682944000
90-
; CHECK-NEXT: Loop 'for.body16' has cost = 32260032000
90+
; CHECK-NEXT: Loop 'for.body16' has cost = 32772096000
9191

9292
;; #define N 128
9393
;; #define M 2048

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ for.end: ; preds = %for.cond
3838

3939
; CHECK: Loop 'for.cond' has cost = 100000000
4040
; CHECK: Loop 'for.cond1' has cost = 1000000
41-
; CHECK: Loop 'for.cond5' has cost = 30000
41+
; CHECK: Loop 'for.cond5' has cost = 40000
4242

4343
@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
4444

@@ -118,7 +118,7 @@ for.neg.end: ; preds = %for.neg.cond
118118
; access functions. When this is fixed this testcase should have a cost
119119
; approximately 2x higher.
120120

121-
; CHECK: Loop 'for.cond2' has cost = 2560
121+
; CHECK: Loop 'for.cond2' has cost = 2561
122122
define void @Test2(ptr %B) {
123123
entry:
124124
br label %for.cond2
@@ -148,7 +148,7 @@ for.end: ; preds = %for.cond
148148
; for (i = 40960; i > 0; i--)
149149
; C[i] = C[i];
150150

151-
; CHECK: Loop 'for.cond3' has cost = 2560
151+
; CHECK: Loop 'for.cond3' has cost = 2561
152152
define void @Test3(ptr %C) {
153153
entry:
154154
br label %for.cond3
@@ -177,7 +177,7 @@ for.end: ; preds = %for.cond
177177
; for (i = 0; i < 40960; i++)
178178
; D[i] = D[i];
179179

180-
; CHECK: Loop 'for.cond4' has cost = 2560
180+
; CHECK: Loop 'for.cond4' has cost = 2561
181181
define void @Test4(ptr %D) {
182182
entry:
183183
br label %for.cond4

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212

1313
; CHECK: Loop 'for.i' has cost = 3000000
14-
; CHECK-NEXT: Loop 'for.k' has cost = 2030000
15-
; CHECK-NEXT: Loop 'for.j' has cost = 1060000
14+
; CHECK-NEXT: Loop 'for.k' has cost = 2040000
15+
; CHECK-NEXT: Loop 'for.j' has cost = 1080000
1616

1717
define void @foo(i64 %n, i64 %m, i64 %o, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212

1313
; CHECK:Loop 'for.i' has cost = 2010000
14-
; CHECK-NEXT:Loop 'for.k' has cost = 1040000
15-
; CHECK-NEXT:Loop 'for.j' has cost = 70000
14+
; CHECK-NEXT:Loop 'for.k' has cost = 1050000
15+
; CHECK-NEXT:Loop 'for.j' has cost = 90000
1616

1717
define void @matmul(i64 %n, i64 %m, i64 %o, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1717
; CHECK: Loop 'k_loop' has cost = 10200000000000000
1818
; CHECK-NEXT: Loop 'j_loop' has cost = 102000000000000
1919
; CHECK-NEXT: Loop 'i_loop' has cost = 1020000000000
20-
; CHECK-NEXT: Loop 'm_loop' has cost = 10700000000
21-
; CHECK-NEXT: Loop 'l_loop' has cost = 1300000000
20+
; CHECK-NEXT: Loop 'm_loop' has cost = 10800000000
21+
; CHECK-NEXT: Loop 'l_loop' has cost = 1500000000
2222

2323
%_elem_type_of_double = type <{ double }>
2424

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
55

66
; CHECK: Loop 'for.j' has cost = 201000000
77
; CHECK-NEXT: Loop 'for.i' has cost = 102000000
8-
; CHECK-NEXT: Loop 'for.k' has cost = 90000
8+
; CHECK-NEXT: Loop 'for.k' has cost = 120000
99

1010
;; Test to make sure when we have multiple conflicting access patterns, the
1111
;; chosen loop configuration favours the majority of those accesses.

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
1212

1313
; CHECK: Loop 'for.i' has cost = 100000000
1414
; CHECK-NEXT: Loop 'for.j' has cost = 1000000
15-
; CHECK-NEXT: Loop 'for.k' has cost = 60000
15+
; CHECK-NEXT: Loop 'for.k' has cost = 70000
1616

1717
define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
1818
entry:
@@ -90,7 +90,7 @@ for.end: ; preds = %for.end.loopexit, %
9090

9191
; CHECK: Loop 'for.i' has cost = 100000000
9292
; CHECK-NEXT: Loop 'for.j' has cost = 1000000
93-
; CHECK-NEXT: Loop 'for.k' has cost = 60000
93+
; CHECK-NEXT: Loop 'for.k' has cost = 70000
9494

9595
define void @foo2(i64 %n, i64 %m, i64 %o, ptr %A) {
9696
entry:

llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
1111
; }
1212
; }
1313

14-
; CHECK: Loop 'for.i' has cost = 20600
15-
; CHECK-NEXT: Loop 'for.j' has cost = 800
14+
; CHECK: Loop 'for.i' has cost = 20800
15+
; CHECK-NEXT: Loop 'for.j' has cost = 1000
1616

1717
define void @foo(i64 %n, i64 %m, ptr %A, ptr %B, ptr %C) {
1818
entry:

llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
; Check IndexedReference::computeRefCost can handle type differences between
99
; Stride and TripCount
1010

11+
; Round costs up to the nearest whole number i.e. in 'for.cond5' cost is calculated 12.5 and
12+
; it makes more sense to say 13 cache lines are used rather than 12 cache lines.
13+
1114
; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256
1215
; LARGER-CACHELINE: Loop 'for.cond' has cost = 32
1316
%struct._Handleitem = type { ptr }
@@ -40,10 +43,10 @@ for.end: ; preds = %for.cond
4043

4144
; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
4245
; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
43-
; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000
46+
; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 130000
4447
; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
4548
; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
46-
; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000
49+
; LARGER-CACHELINE: Loop 'for.cond5' has cost = 20000
4750
@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
4851

4952
define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
@@ -122,8 +125,8 @@ for.neg.end: ; preds = %for.neg.cond
122125
; access functions. When this is fixed this testcase should have a cost
123126
; approximately 2x higher.
124127

125-
; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10240
126-
; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1280
128+
; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241
129+
; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281
127130
define void @Test2(ptr %B) {
128131
entry:
129132
br label %for.cond2
@@ -153,8 +156,8 @@ for.end: ; preds = %for.cond
153156
; for (i = 40960; i > 0; i--)
154157
; C[i] = C[i];
155158

156-
; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10240
157-
; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1280
159+
; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10241
160+
; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1281
158161
define void @Test3(ptr %C) {
159162
entry:
160163
br label %for.cond3
@@ -183,8 +186,8 @@ for.end: ; preds = %for.cond
183186
; for (i = 0; i < 40960; i++)
184187
; D[i] = D[i];
185188

186-
; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10240
187-
; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1280
189+
; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10241
190+
; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1281
188191
define void @Test4(ptr %D) {
189192
entry:
190193
br label %for.cond4
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; RUN: opt < %s -cache-line-size=64 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
2+
3+
;; This test checks the effect of rounding cache cost to 1 when it is
4+
;; evaluated to 0 because at least 1 cache line is accessed by the loopnest.
5+
;; It does not make sense to output that zero cache lines are used.
6+
;; The cost of reference group for B[j], C[j], D[j] and E[j] were
7+
;; calculted 0 before but now they are 1 which makes each loop cost more reasonable.
8+
;
9+
; void test(int n, int m, int o, int A[2][3], int B[2], int C[2], int D[2], int E[2]) {
10+
; for (int i = 0; i < 3; i++)
11+
; for (int j = 0; j < 2; j++)
12+
; A[j][i] = 1;
13+
; B[j] = 1;
14+
; C[j] = 1;
15+
; D[j] = 1
16+
; E[j] = 1
17+
; }
18+
19+
; CHECK: Loop 'for.j' has cost = 18
20+
; CHECK-NEXT: Loop 'for.i' has cost = 10
21+
22+
define void @test(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E) {
23+
24+
entry:
25+
br label %for.i.preheader.split
26+
27+
for.i.preheader.split: ; preds = %for.i.preheader
28+
br label %for.i
29+
30+
for.i: ; preds = %for.inci, %for.i.preheader.split
31+
%i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ]
32+
br label %for.j
33+
34+
for.j: ; preds = %for.incj, %for.i
35+
%j = phi i64 [ %incj, %for.j ], [ 0, %for.i ]
36+
%mul_j = mul nsw i64 %j, 3
37+
%index_j = add i64 %mul_j, %i
38+
%arrayidxA = getelementptr inbounds [2 x [ 3 x i32]], ptr %A, i64 %j, i64 %i
39+
store i32 1, ptr %arrayidxA, align 4
40+
%arrayidxB = getelementptr inbounds i32, ptr %B, i64 %j
41+
store i32 1, ptr %arrayidxB, align 4
42+
%arrayidxC = getelementptr inbounds i32, ptr %C, i64 %j
43+
store i32 1, ptr %arrayidxC, align 4
44+
%arrayidxD = getelementptr inbounds i32, ptr %D, i64 %j
45+
store i32 1, ptr %arrayidxD, align 4
46+
%arrayidxE = getelementptr inbounds i32, ptr %E, i64 %j
47+
store i32 1, ptr %arrayidxE, align 4
48+
%incj = add nsw i64 %j, 1
49+
%exitcond.us = icmp eq i64 %incj, 2
50+
br i1 %exitcond.us, label %for.inci, label %for.j
51+
52+
for.inci: ; preds = %for.incj
53+
%inci = add nsw i64 %i, 1
54+
%exitcond55.us = icmp eq i64 %inci, 3
55+
br i1 %exitcond55.us, label %for.end.loopexit, label %for.i
56+
57+
for.end.loopexit: ; preds = %for.inci
58+
br label %for.end
59+
60+
for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry
61+
ret void
62+
}

llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll

Lines changed: 25 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,25 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=loop-interchange -cache-line-size=64 -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
2+
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S
3+
; RUN: FileCheck --input-file=%t %s
34

45
@b = external dso_local global [5 x i32], align 16
56

7+
;; Not profitable to interchange, because the access is invariant to j loop.
8+
;;
9+
;; for(int i=0;i<4;i++) {
10+
;; for(int j=1;j<4;j++) {
11+
;; b[i] = ....
12+
;; }
13+
;; }
14+
15+
; CHECK: --- !Missed
16+
; CHECK-NEXT: Pass: loop-interchange
17+
; CHECK-NEXT: Name: InterchangeNotProfitable
18+
; CHECK-NEXT: Function: test1
19+
; CHECK-NEXT: Args:
20+
; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
21+
622
define void @test1() {
7-
; CHECK-LABEL: @test1(
8-
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
10-
; CHECK: for.body.preheader:
11-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
12-
; CHECK: for.body:
13-
; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
14-
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
15-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 [[IDXPROM]]
16-
; CHECK-NEXT: br label [[FOR_INC:%.*]]
17-
; CHECK: for.body2.preheader:
18-
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
19-
; CHECK: for.body2:
20-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
21-
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
22-
; CHECK: for.inc:
23-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
24-
; CHECK-NEXT: store i32 undef, ptr [[ARRAYIDX]], align 4
25-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
26-
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
27-
; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
28-
; CHECK: for.inc.split:
29-
; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
30-
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
31-
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
32-
; CHECK: for.cond1.for.end_crit_edge:
33-
; CHECK-NEXT: br label [[FOR_INC3]]
34-
; CHECK: for.inc3:
35-
; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
36-
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
37-
; CHECK: for.cond.for.end5_crit_edge:
38-
; CHECK-NEXT: ret void
39-
;
4023
entry:
4124
br label %for.body
4225

@@ -68,41 +51,15 @@ for.cond.for.end5_crit_edge: ; preds = %for.inc3
6851
ret void
6952
}
7053

54+
55+
; CHECK: --- !Missed
56+
; CHECK-NEXT: Pass: loop-interchange
57+
; CHECK-NEXT: Name: InterchangeNotProfitable
58+
; CHECK-NEXT: Function: test2
59+
; CHECK-NEXT: Args:
60+
; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
61+
7162
define void @test2() {
72-
; CHECK-LABEL: @test2(
73-
; CHECK-NEXT: entry:
74-
; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
75-
; CHECK: for.body.preheader:
76-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
77-
; CHECK: for.body:
78-
; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
79-
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
80-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 [[IDXPROM]]
81-
; CHECK-NEXT: br label [[FOR_INC:%.*]]
82-
; CHECK: for.body2.preheader:
83-
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
84-
; CHECK: for.body2:
85-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
86-
; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
87-
; CHECK: for.inc:
88-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
89-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
90-
; CHECK-NEXT: [[CMP_ZEXT:%.*]] = zext i1 [[CMP]] to i32
91-
; CHECK-NEXT: store i32 [[CMP_ZEXT]], ptr [[ARRAYIDX]], align 4
92-
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
93-
; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
94-
; CHECK: for.inc.split:
95-
; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
96-
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
97-
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
98-
; CHECK: for.cond1.for.end_crit_edge:
99-
; CHECK-NEXT: br label [[FOR_INC3]]
100-
; CHECK: for.inc3:
101-
; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
102-
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
103-
; CHECK: for.cond.for.end5_crit_edge:
104-
; CHECK-NEXT: ret void
105-
;
10663
entry:
10764
br label %for.body
10865

0 commit comments

Comments
 (0)