Skip to content

Commit da98177

Browse files
committed
[ARM] Allow v6m runtime loop unrolling
This removes the restriction that only Thumb2 targets enable runtime loop unrolling, allowing it for Thumb1 only cores as well. The existing T2 heuristics are used (for the time being) to control when and how unrolling is performed. Differential Revision: https://reviews.llvm.org/D99588
1 parent 0f7e3a5 commit da98177

File tree

2 files changed

+83
-107
lines changed

2 files changed

+83
-107
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2140,10 +2140,6 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
21402140
if (L->getHeader()->getParent()->hasOptSize())
21412141
return;
21422142

2143-
// Only enable on Thumb-2 targets.
2144-
if (!ST->isThumb2())
2145-
return;
2146-
21472143
SmallVector<BasicBlock*, 4> ExitingBlocks;
21482144
L->getExitingBlocks(ExitingBlocks);
21492145
LLVM_DEBUG(dbgs() << "Loop has:\n"

llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll

Lines changed: 83 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
; RUN: opt -mtriple=armv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
2-
; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
3-
; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a72 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
4-
; RUN: opt -mtriple=thumbv8m -mcpu=cortex-m23 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T1
5-
; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T2
6-
; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T2
1+
; RUN: opt -mtriple=armv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL
2+
; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL
3+
; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a72 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL
4+
; RUN: opt -mtriple=thumbv8m -mcpu=cortex-m23 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL
5+
; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL
6+
; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL
77

88
; CHECK-LABEL: partial
99
define arm_aapcs_vfpcc void @partial(i32* nocapture %C, i32* nocapture readonly %A, i32* nocapture readonly %B) local_unnamed_addr #0 {
@@ -13,36 +13,31 @@ entry:
1313
; CHECK-LABEL: for.body
1414
for.body:
1515

16-
; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
17-
; CHECK-UNROLL-A: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
18-
; CHECK-UNROLL-A: [[IV2]] = add nuw nsw i32 [[IV1]], 1
19-
; CHECK-UNROLL-A: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV2]], 1024
20-
; CHECK-UNROLL-A: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
21-
22-
; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
23-
; CHECK-UNROLL-T1: [[IV1]] = add nuw nsw i32 [[IV0]], 1
24-
; CHECK-UNROLL-T1: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV1]], 1024
25-
; CHECK-UNROLL-T1: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
26-
27-
; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV16:%[a-z.0-9]+]], %for.body ]
28-
; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
29-
; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
30-
; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
31-
; CHECK-UNROLL-T2: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV3]], 1
32-
; CHECK-UNROLL-T2: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV4]], 1
33-
; CHECK-UNROLL-T2: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV5]], 1
34-
; CHECK-UNROLL-T2: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV6]], 1
35-
; CHECK-UNROLL-T2: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV7]], 1
36-
; CHECK-UNROLL-T2: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV8]], 1
37-
; CHECK-UNROLL-T2: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV9]], 1
38-
; CHECK-UNROLL-T2: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV10]], 1
39-
; CHECK-UNROLL-T2: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV11]], 1
40-
; CHECK-UNROLL-T2: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV12]], 1
41-
; CHECK-UNROLL-T2: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV13]], 1
42-
; CHECK-UNROLL-T2: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV14]], 1
43-
; CHECK-UNROLL-T2: [[IV16]] = add nuw nsw i32 [[IV15]], 1
44-
; CHECK-UNROLL-T2: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV16]], 1024
45-
; CHECK-UNROLL-T2: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
16+
; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
17+
; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
18+
; CHECK-NOUNROLL: [[IV2]] = add nuw nsw i32 [[IV1]], 1
19+
; CHECK-NOUNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV2]], 1024
20+
; CHECK-NOUNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
21+
22+
; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV16:%[a-z.0-9]+]], %for.body ]
23+
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
24+
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
25+
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
26+
; CHECK-UNROLL: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV3]], 1
27+
; CHECK-UNROLL: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV4]], 1
28+
; CHECK-UNROLL: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV5]], 1
29+
; CHECK-UNROLL: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV6]], 1
30+
; CHECK-UNROLL: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV7]], 1
31+
; CHECK-UNROLL: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV8]], 1
32+
; CHECK-UNROLL: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV9]], 1
33+
; CHECK-UNROLL: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV10]], 1
34+
; CHECK-UNROLL: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV11]], 1
35+
; CHECK-UNROLL: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV12]], 1
36+
; CHECK-UNROLL: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV13]], 1
37+
; CHECK-UNROLL: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV14]], 1
38+
; CHECK-UNROLL: [[IV16]] = add nuw nsw i32 [[IV15]], 1
39+
; CHECK-UNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV16]], 1024
40+
; CHECK-UNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
4641

4742
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
4843
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.08
@@ -68,26 +63,21 @@ entry:
6863

6964
; CHECK-LABEL: for.body
7065
for.body:
71-
; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
72-
; CHECK-UNROLL-A: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
73-
; CHECK-UNROLL-A: [[IV2]] = add nuw i32 [[IV1]], 1
74-
; CHECK-UNROLL-A: br
75-
76-
; CHECK-UNROLL-T1: %i.09 = phi i32 [ %inc, %for.body ], [ 0
77-
; CHECK-UNROLL-T1: %inc = add nuw i32 %i.09, 1
78-
; CHECK-UNROLL-T1: %exitcond = icmp eq i32 %inc, %N
79-
; CHECK-UNROLL-T1: br
80-
81-
; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body ]
82-
; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
83-
; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
84-
; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
85-
; CHECK-UNROLL-T2: [[IV4]] = add nuw i32 [[IV3]], 1
86-
; CHECK-UNROLL-T2: br
87-
88-
; CHECK-UNROLL-T2: for.body.epil:
89-
; CHECK-UNROLL-T2: for.body.epil.1:
90-
; CHECK-UNROLL-T2: for.body.epil.2:
66+
; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
67+
; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
68+
; CHECK-NOUNROLL: [[IV2]] = add nuw i32 [[IV1]], 1
69+
; CHECK-NOUNROLL: br
70+
71+
; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body ]
72+
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
73+
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
74+
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
75+
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV3]], 1
76+
; CHECK-UNROLL: br
77+
78+
; CHECK-UNROLL: for.body.epil:
79+
; CHECK-UNROLL: for.body.epil.1:
80+
; CHECK-UNROLL: for.body.epil.2:
9181

9282
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
9383
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09
@@ -126,19 +116,19 @@ for.cond.cleanup3:
126116

127117
; CHECK-LABEL: for.body4
128118
for.body4:
129-
; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV1:%[a-z.0-9]+]], %for.body4 ]
130-
; CHECK-UNROLL-T1: [[IV1]] = add nuw i32 [[IV0]], 1
131-
; CHECK-UNROLL-T1: br
132-
133-
; CHECK-UNROLL-T2: for.body4.epil:
134-
; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ]
135-
; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
136-
; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
137-
; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
138-
; CHECK-UNROLL-T2: [[IV4]] = add nuw i32 [[IV3]], 1
139-
; CHECK-UNROLL-T2: br
140-
; CHECK-UNROLL-T2: for.body4.epil.1:
141-
; CHECK-UNROLL-T2: for.body4.epil.2:
119+
; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV1:%[a-z.0-9]+]], %for.body4 ]
120+
; CHECK-NOUNROLL: [[IV1]] = add nuw i32 [[IV0]], 1
121+
; CHECK-NOUNROLL: br
122+
123+
; CHECK-UNROLL: for.body4.epil:
124+
; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ]
125+
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
126+
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
127+
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
128+
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV3]], 1
129+
; CHECK-UNROLL: br
130+
; CHECK-UNROLL: for.body4.epil.1:
131+
; CHECK-UNROLL: for.body4.epil.2:
142132

143133
%w.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
144134
%add = add i32 %w.024, %mul
@@ -168,20 +158,15 @@ for.cond.cleanup:
168158

169159
; CHECK-LABEL: for.body
170160
for.body:
171-
; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
172-
; CHECK-UNROLL-A: [[IV1]] = add nuw nsw i32 [[IV0]], 1
173-
; CHECK-UNROLL-A: icmp eq i32 [[IV1]], 1024
174-
; CHECK-UNROLL-A: br
161+
; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
162+
; CHECK-NOUNROLL: [[IV1]] = add nuw nsw i32 [[IV0]], 1
163+
; CHECK-NOUNROLL: icmp eq i32 [[IV1]], 1024
164+
; CHECK-NOUNROLL: br
175165

176-
; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
177-
; CHECK-UNROLL-T1: [[IV1]] = add nuw nsw i32 [[IV0]], 1
178-
; CHECK-UNROLL-T1: icmp eq i32 [[IV1]], 1024
179-
; CHECK-UNROLL-T1: br
180-
181-
; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
182-
; CHECK-UNROLL-T2: [[IV1]] = add nuw nsw i32 [[IV0]], 1
183-
; CHECK-UNROLL-T2: icmp eq i32 [[IV1]], 1024
184-
; CHECK-UNROLL-T2: br
166+
; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
167+
; CHECK-UNROLL: [[IV1]] = add nuw nsw i32 [[IV0]], 1
168+
; CHECK-UNROLL: icmp eq i32 [[IV1]], 1024
169+
; CHECK-UNROLL: br
185170

186171
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
187172
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.08
@@ -197,27 +182,22 @@ for.body:
197182
}
198183

199184
; CHECK-LABEL: iterate_inc
200-
; CHECK-UNROLL-A: %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
201-
; CHECK-UNROLL-A: %tobool = icmp eq %struct.Node* %1, null
202-
; CHECK-UNROLL-A: br i1 %tobool
203-
; CHECK-UNROLL-A-NOT: load
204-
205-
; CHECK-UNROLL-T1: %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
206-
; CHECK-UNROLL-T1: %tobool = icmp eq %struct.Node* %1, null
207-
; CHECK-UNROLL-T1: br i1 %tobool
208-
; CHECK-UNROLL-T1-NOT: load
209-
210-
; CHECK-UNROLL-T2: [[CMP0:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR0:%[a-z.0-9]+]], null
211-
; CHECK-UNROLL-T2: br i1 [[CMP0]], label [[END:%[a-z.0-9]+]]
212-
; CHECK-UNROLL-T2: [[CMP1:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR1:%[a-z.0-9]+]], null
213-
; CHECK-UNROLL-T2: br i1 [[CMP1]], label [[END]]
214-
; CHECK-UNROLL-T2: [[CMP2:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR2:%[a-z.0-9]+]], null
215-
; CHECK-UNROLL-T2: br i1 [[CMP2]], label [[END]]
216-
; CHECK-UNROLL-T2: [[CMP3:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR3:%[a-z.0-9]+]], null
217-
; CHECK-UNROLL-T2: br i1 [[CMP3]], label [[END]]
218-
; CHECK-UNROLL-T2: [[CMP4:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR4:%[a-z.0-9]+]], null
219-
; CHECK-UNROLL-T2: br i1 [[CMP4]], label [[END]]
220-
; CHECK-UNROLL-T2-NOT: load
185+
; CHECK-NOUNROLL: %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
186+
; CHECK-NOUNROLL: %tobool = icmp eq %struct.Node* %1, null
187+
; CHECK-NOUNROLL: br i1 %tobool
188+
; CHECK-NOUNROLL-NOT: load
189+
190+
; CHECK-UNROLL: [[CMP0:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR0:%[a-z.0-9]+]], null
191+
; CHECK-UNROLL: br i1 [[CMP0]], label [[END:%[a-z.0-9]+]]
192+
; CHECK-UNROLL: [[CMP1:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR1:%[a-z.0-9]+]], null
193+
; CHECK-UNROLL: br i1 [[CMP1]], label [[END]]
194+
; CHECK-UNROLL: [[CMP2:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR2:%[a-z.0-9]+]], null
195+
; CHECK-UNROLL: br i1 [[CMP2]], label [[END]]
196+
; CHECK-UNROLL: [[CMP3:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR3:%[a-z.0-9]+]], null
197+
; CHECK-UNROLL: br i1 [[CMP3]], label [[END]]
198+
; CHECK-UNROLL: [[CMP4:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR4:%[a-z.0-9]+]], null
199+
; CHECK-UNROLL: br i1 [[CMP4]], label [[END]]
200+
; CHECK-UNROLL-NOT: load
221201

222202
%struct.Node = type { %struct.Node*, i32 }
223203

0 commit comments

Comments
 (0)