Skip to content

Commit 8487981

Browse files
committed
[RISCV] Add target specific loop unrolling and peeling preferences
Both these preference helper functions have initial support with this change. The loop unrolling preferences are set with initial settings to control thresholds, size and attributes of loops to unroll with some tuning done. The peeling preferences may need some tuning as well as the initial support looks much like what other architectures utilize. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D113798
1 parent 6de698b commit 8487981

File tree

3 files changed

+248
-0
lines changed

3 files changed

+248
-0
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,82 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
162162
getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I);
163163
return NumLoads * MemOpCost;
164164
}
165+
166+
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
167+
TTI::UnrollingPreferences &UP,
168+
OptimizationRemarkEmitter *ORE) {
169+
// TODO: More tuning on benchmarks and metrics with changes as needed
170+
// would apply to all settings below to enable performance.
171+
172+
// Enable Upper bound unrolling universally, not dependant upon the conditions
173+
// below.
174+
UP.UpperBound = true;
175+
176+
// Disable loop unrolling for Oz and Os.
177+
UP.OptSizeThreshold = 0;
178+
UP.PartialOptSizeThreshold = 0;
179+
if (L->getHeader()->getParent()->hasOptSize())
180+
return;
181+
182+
SmallVector<BasicBlock *, 4> ExitingBlocks;
183+
L->getExitingBlocks(ExitingBlocks);
184+
LLVM_DEBUG(dbgs() << "Loop has:\n"
185+
<< "Blocks: " << L->getNumBlocks() << "\n"
186+
<< "Exit blocks: " << ExitingBlocks.size() << "\n");
187+
188+
// Only allow another exit other than the latch. This acts as an early exit
189+
// as it mirrors the profitability calculation of the runtime unroller.
190+
if (ExitingBlocks.size() > 2)
191+
return;
192+
193+
// Limit the CFG of the loop body for targets with a branch predictor.
194+
// Allowing 4 blocks permits if-then-else diamonds in the body.
195+
if (L->getNumBlocks() > 4)
196+
return;
197+
198+
// Don't unroll vectorized loops, including the remainder loop
199+
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
200+
return;
201+
202+
// Scan the loop: don't unroll loops with calls as this could prevent
203+
// inlining.
204+
InstructionCost Cost = 0;
205+
for (auto *BB : L->getBlocks()) {
206+
for (auto &I : *BB) {
207+
// Initial setting - Don't unroll loops containing vectorized
208+
// instructions.
209+
if (I.getType()->isVectorTy())
210+
return;
211+
212+
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
213+
if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
214+
if (!isLoweredToCall(F))
215+
continue;
216+
}
217+
return;
218+
}
219+
220+
SmallVector<const Value *> Operands(I.operand_values());
221+
Cost +=
222+
getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
223+
}
224+
}
225+
226+
LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
227+
228+
UP.Partial = true;
229+
UP.Runtime = true;
230+
UP.UnrollRemainder = true;
231+
UP.UnrollAndJam = true;
232+
UP.UnrollAndJamInnerLoopThreshold = 60;
233+
234+
// Force unrolling small loops can be very useful because of the branch
235+
// taken cost of the backedge.
236+
if (Cost < 12)
237+
UP.Force = true;
238+
}
239+
240+
void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
241+
TTI::PeelingPreferences &PP) {
242+
BaseT::getPeelingPreferences(L, SE, PP);
243+
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
7373
llvm_unreachable("Unsupported register kind");
7474
}
7575

76+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
77+
TTI::UnrollingPreferences &UP,
78+
OptimizationRemarkEmitter *ORE);
79+
80+
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
81+
TTI::PeelingPreferences &PP);
82+
7683
unsigned getMinVectorRegisterBitWidth() const {
7784
return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0;
7885
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt %s -S -mtriple=riscv64 -loop-unroll | FileCheck %s
3+
4+
define dso_local void @saxpy(float %a, float* %x, float* %y) {
5+
; CHECK-LABEL: @saxpy(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8+
; CHECK: for.body:
9+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_15:%.*]], [[FOR_BODY]] ]
10+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 [[INDVARS_IV]]
11+
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
12+
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP0]], [[A:%.*]]
13+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[Y:%.*]], i64 [[INDVARS_IV]]
14+
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
15+
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL]], [[TMP1]]
16+
; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4
17+
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
18+
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
20+
; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[TMP2]], [[A]]
21+
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT]]
22+
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX2_1]], align 4
23+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL_1]], [[TMP3]]
24+
; CHECK-NEXT: store float [[ADD_1]], float* [[ARRAYIDX2_1]], align 4
25+
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
26+
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_1]]
27+
; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX_2]], align 4
28+
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[TMP4]], [[A]]
29+
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_1]]
30+
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX2_2]], align 4
31+
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]
32+
; CHECK-NEXT: store float [[ADD_2]], float* [[ARRAYIDX2_2]], align 4
33+
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
34+
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_2]]
35+
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_3]], align 4
36+
; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[TMP6]], [[A]]
37+
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_2]]
38+
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX2_3]], align 4
39+
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[MUL_3]], [[TMP7]]
40+
; CHECK-NEXT: store float [[ADD_3]], float* [[ARRAYIDX2_3]], align 4
41+
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
42+
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_3]]
43+
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_4]], align 4
44+
; CHECK-NEXT: [[MUL_4:%.*]] = fmul fast float [[TMP8]], [[A]]
45+
; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_3]]
46+
; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX2_4]], align 4
47+
; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[MUL_4]], [[TMP9]]
48+
; CHECK-NEXT: store float [[ADD_4]], float* [[ARRAYIDX2_4]], align 4
49+
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
50+
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_4]]
51+
; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX_5]], align 4
52+
; CHECK-NEXT: [[MUL_5:%.*]] = fmul fast float [[TMP10]], [[A]]
53+
; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_4]]
54+
; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX2_5]], align 4
55+
; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[MUL_5]], [[TMP11]]
56+
; CHECK-NEXT: store float [[ADD_5]], float* [[ARRAYIDX2_5]], align 4
57+
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
58+
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_5]]
59+
; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX_6]], align 4
60+
; CHECK-NEXT: [[MUL_6:%.*]] = fmul fast float [[TMP12]], [[A]]
61+
; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_5]]
62+
; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX2_6]], align 4
63+
; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[MUL_6]], [[TMP13]]
64+
; CHECK-NEXT: store float [[ADD_6]], float* [[ARRAYIDX2_6]], align 4
65+
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
66+
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_6]]
67+
; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX_7]], align 4
68+
; CHECK-NEXT: [[MUL_7:%.*]] = fmul fast float [[TMP14]], [[A]]
69+
; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_6]]
70+
; CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX2_7]], align 4
71+
; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[MUL_7]], [[TMP15]]
72+
; CHECK-NEXT: store float [[ADD_7]], float* [[ARRAYIDX2_7]], align 4
73+
; CHECK-NEXT: [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
74+
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_7]]
75+
; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX_8]], align 4
76+
; CHECK-NEXT: [[MUL_8:%.*]] = fmul fast float [[TMP16]], [[A]]
77+
; CHECK-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_7]]
78+
; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2_8]], align 4
79+
; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[MUL_8]], [[TMP17]]
80+
; CHECK-NEXT: store float [[ADD_8]], float* [[ARRAYIDX2_8]], align 4
81+
; CHECK-NEXT: [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_7]], 1
82+
; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_8]]
83+
; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX_9]], align 4
84+
; CHECK-NEXT: [[MUL_9:%.*]] = fmul fast float [[TMP18]], [[A]]
85+
; CHECK-NEXT: [[ARRAYIDX2_9:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_8]]
86+
; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX2_9]], align 4
87+
; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[MUL_9]], [[TMP19]]
88+
; CHECK-NEXT: store float [[ADD_9]], float* [[ARRAYIDX2_9]], align 4
89+
; CHECK-NEXT: [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_8]], 1
90+
; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_9]]
91+
; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX_10]], align 4
92+
; CHECK-NEXT: [[MUL_10:%.*]] = fmul fast float [[TMP20]], [[A]]
93+
; CHECK-NEXT: [[ARRAYIDX2_10:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_9]]
94+
; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2_10]], align 4
95+
; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[MUL_10]], [[TMP21]]
96+
; CHECK-NEXT: store float [[ADD_10]], float* [[ARRAYIDX2_10]], align 4
97+
; CHECK-NEXT: [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_9]], 1
98+
; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_10]]
99+
; CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX_11]], align 4
100+
; CHECK-NEXT: [[MUL_11:%.*]] = fmul fast float [[TMP22]], [[A]]
101+
; CHECK-NEXT: [[ARRAYIDX2_11:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_10]]
102+
; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX2_11]], align 4
103+
; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[MUL_11]], [[TMP23]]
104+
; CHECK-NEXT: store float [[ADD_11]], float* [[ARRAYIDX2_11]], align 4
105+
; CHECK-NEXT: [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_10]], 1
106+
; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_11]]
107+
; CHECK-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX_12]], align 4
108+
; CHECK-NEXT: [[MUL_12:%.*]] = fmul fast float [[TMP24]], [[A]]
109+
; CHECK-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_11]]
110+
; CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX2_12]], align 4
111+
; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[MUL_12]], [[TMP25]]
112+
; CHECK-NEXT: store float [[ADD_12]], float* [[ARRAYIDX2_12]], align 4
113+
; CHECK-NEXT: [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_11]], 1
114+
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_12]]
115+
; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX_13]], align 4
116+
; CHECK-NEXT: [[MUL_13:%.*]] = fmul fast float [[TMP26]], [[A]]
117+
; CHECK-NEXT: [[ARRAYIDX2_13:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_12]]
118+
; CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX2_13]], align 4
119+
; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[MUL_13]], [[TMP27]]
120+
; CHECK-NEXT: store float [[ADD_13]], float* [[ARRAYIDX2_13]], align 4
121+
; CHECK-NEXT: [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_12]], 1
122+
; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_13]]
123+
; CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX_14]], align 4
124+
; CHECK-NEXT: [[MUL_14:%.*]] = fmul fast float [[TMP28]], [[A]]
125+
; CHECK-NEXT: [[ARRAYIDX2_14:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_13]]
126+
; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX2_14]], align 4
127+
; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[MUL_14]], [[TMP29]]
128+
; CHECK-NEXT: store float [[ADD_14]], float* [[ARRAYIDX2_14]], align 4
129+
; CHECK-NEXT: [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_13]], 1
130+
; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 [[INDVARS_IV_NEXT_14]]
131+
; CHECK-NEXT: [[TMP30:%.*]] = load float, float* [[ARRAYIDX_15]], align 4
132+
; CHECK-NEXT: [[MUL_15:%.*]] = fmul fast float [[TMP30]], [[A]]
133+
; CHECK-NEXT: [[ARRAYIDX2_15:%.*]] = getelementptr inbounds float, float* [[Y]], i64 [[INDVARS_IV_NEXT_14]]
134+
; CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX2_15]], align 4
135+
; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float [[MUL_15]], [[TMP31]]
136+
; CHECK-NEXT: store float [[ADD_15]], float* [[ARRAYIDX2_15]], align 4
137+
; CHECK-NEXT: [[INDVARS_IV_NEXT_15]] = add nuw nsw i64 [[INDVARS_IV_NEXT_14]], 1
138+
; CHECK-NEXT: [[EXITCOND_NOT_15:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_15]], 64
139+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_15]], label [[EXIT_LOOP:%.*]], label [[FOR_BODY]]
140+
; CHECK: exit_loop:
141+
; CHECK-NEXT: ret void
142+
;
143+
entry:
144+
br label %for.body
145+
146+
for.body:
147+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
148+
%arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
149+
%0 = load float, float* %arrayidx, align 4
150+
%mul = fmul fast float %0, %a
151+
%arrayidx2 = getelementptr inbounds float, float* %y, i64 %indvars.iv
152+
%1 = load float, float* %arrayidx2, align 4
153+
%add = fadd fast float %mul, %1
154+
store float %add, float* %arrayidx2, align 4
155+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
156+
%exitcond.not = icmp eq i64 %indvars.iv.next, 64
157+
br i1 %exitcond.not, label %exit_loop, label %for.body
158+
159+
exit_loop:
160+
ret void
161+
}
162+

0 commit comments

Comments
 (0)