Skip to content

Commit bb3eb0c

Browse files
[ARM] Test unroll behaviour on machines with low overhead branching (#118692)
Add test for existing loop unroll behaviour. Current behaviour is the single loop with fmul gets runtime unrolled by count of 4, with the loop remainder unrolled as the 3 for.body9.us.prol sections. This is quite a lot of compare and branch, negating the benefits of the low overhead loop mechanism.
1 parent a43b2e1 commit bb3eb0c

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: opt -mcpu=cortex-m55 -mtriple=thumbv8.1m.main -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=LOB
2+
3+
; This test checks behaviour of loop unrolling on processors with low overhead branching available
4+
5+
; LOB-CHECK-LABEL: for.body{{.*}}.prol
6+
; LOB-COUNT-1: fmul fast float
7+
; LOB-CHECK-LABEL: for.body{{.*}}.prol.1
8+
; LOB-COUNT-1: fmul fast float
9+
; LOB-CHECK-LABEL: for.body{{.*}}.prol.2
10+
; LOB-COUNT-1: fmul fast float
11+
; LOB-CHECK-LABEL: for.body{{.*}}
12+
; LOB-COUNT-4: fmul fast float
13+
; LOB-NOT: fmul fast float
14+
15+
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
16+
define dso_local void @test(i32 noundef %n, ptr nocapture noundef %pA) local_unnamed_addr #0 {
17+
entry:
18+
%cmp46 = icmp sgt i32 %n, 0
19+
br i1 %cmp46, label %for.body, label %for.cond.cleanup
20+
21+
for.cond.loopexit: ; preds = %for.cond6.for.cond.cleanup8_crit_edge.us, %for.body
22+
%exitcond49.not = icmp eq i32 %add, %n
23+
br i1 %exitcond49.not, label %for.cond.cleanup, label %for.body
24+
25+
for.cond.cleanup: ; preds = %for.cond.loopexit, %entry
26+
ret void
27+
28+
for.body: ; preds = %entry, %for.cond.loopexit
29+
%k.047 = phi i32 [ %add, %for.cond.loopexit ], [ 0, %entry ]
30+
%add = add nuw nsw i32 %k.047, 1
31+
%cmp244 = icmp slt i32 %add, %n
32+
br i1 %cmp244, label %for.cond6.preheader.lr.ph, label %for.cond.loopexit
33+
34+
for.cond6.preheader.lr.ph: ; preds = %for.body
35+
%invariant.gep = getelementptr float, ptr %pA, i32 %k.047
36+
br label %for.cond6.preheader.us
37+
38+
for.cond6.preheader.us: ; preds = %for.cond6.for.cond.cleanup8_crit_edge.us, %for.cond6.preheader.lr.ph
39+
%w.045.us = phi i32 [ %add, %for.cond6.preheader.lr.ph ], [ %inc19.us, %for.cond6.for.cond.cleanup8_crit_edge.us ]
40+
%mul.us = mul nuw nsw i32 %w.045.us, %n
41+
%0 = getelementptr float, ptr %pA, i32 %mul.us
42+
%arrayidx.us = getelementptr float, ptr %0, i32 %k.047
43+
br label %for.body9.us
44+
45+
for.body9.us: ; preds = %for.cond6.preheader.us, %for.body9.us
46+
%x.043.us = phi i32 [ %add, %for.cond6.preheader.us ], [ %inc.us, %for.body9.us ]
47+
%1 = load float, ptr %arrayidx.us, align 4
48+
%mul11.us = mul nuw nsw i32 %x.043.us, %n
49+
%gep.us = getelementptr float, ptr %invariant.gep, i32 %mul11.us
50+
%2 = load float, ptr %gep.us, align 4
51+
%mul14.us = fmul fast float %2, %1
52+
%arrayidx17.us = getelementptr float, ptr %0, i32 %x.043.us
53+
store float %mul14.us, ptr %arrayidx17.us, align 4
54+
%inc.us = add nuw nsw i32 %x.043.us, 1
55+
%exitcond.not = icmp eq i32 %inc.us, %n
56+
br i1 %exitcond.not, label %for.cond6.for.cond.cleanup8_crit_edge.us, label %for.body9.us
57+
58+
for.cond6.for.cond.cleanup8_crit_edge.us: ; preds = %for.body9.us
59+
%inc19.us = add nuw nsw i32 %w.045.us, 1
60+
%exitcond48.not = icmp eq i32 %inc19.us, %n
61+
br i1 %exitcond48.not, label %for.cond.loopexit, label %for.cond6.preheader.us
62+
}
63+

0 commit comments

Comments
 (0)