Skip to content

Commit 9cfa550

Browse files
committed
Address review comment
1 parent 64c9960 commit 9cfa550

File tree

2 files changed

+242
-47
lines changed

2 files changed

+242
-47
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4258,20 +4258,29 @@ static bool shouldUnrollLoopWithInstruction(Instruction &I,
42584258
return true;
42594259
}
42604260

4261-
static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
4262-
AArch64TTIImpl &TTI) {
4263-
// Small search loops with multiple exits can be highly beneficial to unroll.
4264-
// We only care about loops with exactly two exiting blocks, although each
4265-
// block could jump to the same exit block.
4266-
SmallVector<BasicBlock *> Blocks(L->getBlocks());
4267-
if (Blocks.size() != 2 || L->getExitingBlock())
4268-
return false;
4261+
static InstructionCost getSizeOfLoop(Loop *L, AArch64TTIImpl &TTI) {
4262+
// Estimate the size of the loop.
4263+
InstructionCost Size = 0;
4264+
for (auto *BB : L->getBlocks()) {
4265+
for (auto &I : *BB) {
4266+
if (!shouldUnrollLoopWithInstruction(I, TTI))
4267+
return InstructionCost::getInvalid();
42694268

4270-
if (any_of(Blocks, [](BasicBlock *BB) {
4271-
return !isa<BranchInst>(BB->getTerminator());
4272-
}))
4273-
return false;
4269+
SmallVector<const Value *, 4> Operands(I.operand_values());
4270+
InstructionCost Cost =
4271+
TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
4272+
// This can happen with intrinsics that don't currently have a cost model
4273+
// or for some operations that require SVE.
4274+
if (!Cost.isValid())
4275+
return InstructionCost::getInvalid();
4276+
Size += *Cost.getValue();
4277+
}
4278+
}
4279+
return Size;
4280+
}
42744281

4282+
static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
4283+
AArch64TTIImpl &TTI) {
42754284
// Only consider loops with unknown trip counts for which we can determine
42764285
// a symbolic expression. Multi-exit loops with small known trip counts will
42774286
// likely be unrolled anyway.
@@ -4285,25 +4294,27 @@ static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
42854294
if (MaxTC > 0 && MaxTC <= 32)
42864295
return false;
42874296

4297+
if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
4298+
return false;
4299+
42884300
// Estimate the size of the loop.
4289-
int64_t Size = 0;
4290-
for (auto *BB : L->getBlocks()) {
4291-
for (auto &I : *BB) {
4292-
if (!shouldUnrollLoopWithInstruction(I, TTI))
4293-
return false;
4301+
InstructionCost Size = getSizeOfLoop(L, TTI);
4302+
if (!Size.isValid())
4303+
return false;
42944304

4295-
SmallVector<const Value *, 4> Operands(I.operand_values());
4296-
InstructionCost Cost =
4297-
TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
4298-
// This can happen with intrinsics that don't currently have a cost model
4299-
// or for some operations that require SVE.
4300-
if (!Cost.isValid())
4301-
return false;
4302-
Size += *Cost.getValue();
4303-
}
4304-
}
4305+
// Small search loops with multiple exits can be highly beneficial to unroll.
4306+
// We only care about loops with exactly two exiting blocks, although each
4307+
// block could jump to the same exit block.
4308+
SmallVector<BasicBlock *> Blocks(L->getBlocks());
4309+
if (Blocks.size() != 2)
4310+
return false;
4311+
4312+
if (any_of(Blocks, [](BasicBlock *BB) {
4313+
return !isa<BranchInst>(BB->getTerminator());
4314+
}))
4315+
return false;
43054316

4306-
return Size < 6;
4317+
return *Size.getValue() < 6;
43074318
}
43084319

43094320
/// For Apple CPUs, we want to runtime-unroll loops to make better use if the
@@ -4339,24 +4350,9 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
43394350
}
43404351
}
43414352

4342-
// Small search loops with multiple exits can be highly beneficial to unroll.
4343-
if (!L->getExitBlock()) {
4344-
if (L->getNumBlocks() == 2 && Size < 6 &&
4345-
all_of(
4346-
L->getBlocks(),
4347-
[](BasicBlock *BB) {
4348-
return isa<BranchInst>(BB->getTerminator());
4349-
})) {
4350-
UP.RuntimeUnrollMultiExit = true;
4351-
UP.Runtime = true;
4352-
// Limit unroll count.
4353-
UP.DefaultUnrollRuntimeCount = 4;
4354-
// Allow slightly more costly trip-count expansion to catch search loops
4355-
// with pointer inductions.
4356-
UP.SCEVExpansionBudget = 5;
4357-
}
4353+
// This is handled by common code.
4354+
if (!L->getExitBlock())
43584355
return;
4359-
}
43604356

43614357
if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
43624358
return;
@@ -4466,12 +4462,15 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
44664462
UP.PartialOptSizeThreshold = 0;
44674463

44684464
// Apply subtarget-specific unrolling preferences.
4465+
unsigned SmallMultiExitLoopUnrollFactor = SmallMultiExitLoopUF;
44694466
switch (ST->getProcFamily()) {
44704467
case AArch64Subtarget::AppleA14:
44714468
case AArch64Subtarget::AppleA15:
44724469
case AArch64Subtarget::AppleA16:
44734470
case AArch64Subtarget::AppleM4:
44744471
getAppleRuntimeUnrollPreferences(L, SE, UP, *this);
4472+
if (!SmallMultiExitLoopUF.getNumOccurrences())
4473+
SmallMultiExitLoopUnrollFactor = 4;
44754474
break;
44764475
case AArch64Subtarget::Falkor:
44774476
if (EnableFalkorHWPFUnrollFix)
@@ -4481,14 +4480,16 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
44814480
break;
44824481
}
44834482

4484-
if (SmallMultiExitLoopUF && shouldUnrollSmallMultiExitLoop(L, SE, *this)) {
4483+
if (!L->getExitBlock() && SmallMultiExitLoopUnrollFactor &&
4484+
shouldUnrollMultiExitLoop(L, SE, *this)) {
44854485
UP.RuntimeUnrollMultiExit = true;
44864486
UP.Runtime = true;
44874487
// Limit unroll count.
4488-
UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUF;
4488+
UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUnrollFactor;
44894489
// Allow slightly more costly trip-count expansion to catch search loops
44904490
// with pointer inductions.
44914491
UP.SCEVExpansionBudget = 5;
4492+
return;
44924493
}
44934494

44944495
// Scan the loop: don't unroll loops with calls as this could prevent

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
44
; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
55
; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
6+
; RUN: opt -p loop-unroll -mcpu=apple-m4 -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefix=UNROLL2 %s
67
; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
78

89
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
@@ -86,6 +87,61 @@ define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
8687
; APPLE-NEXT: [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
8788
; APPLE-NEXT: ret i1 [[C_5]]
8889
;
90+
; UNROLL2-LABEL: define i1 @multi_2_exit_find_i8_loop(
91+
; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
92+
; UNROLL2-NEXT: [[ENTRY:.*]]:
93+
; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
94+
; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
95+
; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
96+
; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
97+
; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
98+
; UNROLL2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
99+
; UNROLL2-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
100+
; UNROLL2-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
101+
; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
102+
; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
103+
; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
104+
; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
105+
; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
106+
; UNROLL2: [[LOOP_HEADER_PROL]]:
107+
; UNROLL2-NEXT: [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
108+
; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
109+
; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
110+
; UNROLL2: [[LOOP_LATCH_PROL]]:
111+
; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
112+
; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
113+
; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
114+
; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
115+
; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
116+
; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
117+
; UNROLL2-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
118+
; UNROLL2: [[ENTRY_NEW]]:
119+
; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
120+
; UNROLL2: [[LOOP_HEADER]]:
121+
; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
122+
; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
123+
; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
124+
; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
125+
; UNROLL2: [[LOOP_LATCH]]:
126+
; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
127+
; UNROLL2-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
128+
; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
129+
; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
130+
; UNROLL2: [[LOOP_LATCH_1]]:
131+
; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
132+
; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
133+
; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
134+
; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
135+
; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
136+
; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
137+
; UNROLL2: [[EXIT_UNR_LCSSA]]:
138+
; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
139+
; UNROLL2-NEXT: br label %[[EXIT]]
140+
; UNROLL2: [[EXIT]]:
141+
; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
142+
; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
143+
; UNROLL2-NEXT: ret i1 [[C_3]]
144+
;
89145
; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
90146
; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
91147
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -215,6 +271,67 @@ define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
215271
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
216272
; APPLE-NEXT: ret i1 [[C_3]]
217273
;
274+
; UNROLL2-LABEL: define i1 @multi_2_exit_find_ptr_loop(
275+
; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
276+
; UNROLL2-NEXT: [[ENTRY:.*]]:
277+
; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
278+
; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
279+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
280+
; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
281+
; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
282+
; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
283+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
284+
; UNROLL2-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
285+
; UNROLL2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
286+
; UNROLL2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
287+
; UNROLL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
288+
; UNROLL2-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
289+
; UNROLL2-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
290+
; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
291+
; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
292+
; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
293+
; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
294+
; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
295+
; UNROLL2: [[LOOP_HEADER_PROL]]:
296+
; UNROLL2-NEXT: [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
297+
; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
298+
; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
299+
; UNROLL2: [[LOOP_LATCH_PROL]]:
300+
; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
301+
; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
302+
; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
303+
; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
304+
; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
305+
; UNROLL2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
306+
; UNROLL2-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
307+
; UNROLL2: [[ENTRY_NEW]]:
308+
; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
309+
; UNROLL2: [[LOOP_HEADER]]:
310+
; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
311+
; UNROLL2-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
312+
; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
313+
; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
314+
; UNROLL2: [[LOOP_LATCH]]:
315+
; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
316+
; UNROLL2-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
317+
; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
318+
; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
319+
; UNROLL2: [[LOOP_LATCH_1]]:
320+
; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
321+
; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
322+
; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
323+
; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
324+
; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
325+
; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
326+
; UNROLL2: [[EXIT_UNR_LCSSA]]:
327+
; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
328+
; UNROLL2-NEXT: br label %[[EXIT]]
329+
; UNROLL2: [[EXIT]]:
330+
; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
331+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
332+
; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
333+
; UNROLL2-NEXT: ret i1 [[C_3]]
334+
;
218335
; OTHER-LABEL: define i1 @multi_2_exit_find_ptr_loop(
219336
; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
220337
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -289,6 +406,29 @@ define i1 @multi_2_exit_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
289406
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
290407
; APPLE-NEXT: ret i1 [[C_3]]
291408
;
409+
; UNROLL2-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
410+
; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
411+
; UNROLL2-NEXT: [[ENTRY:.*]]:
412+
; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
413+
; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
414+
; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
415+
; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
416+
; UNROLL2: [[LOOP_HEADER]]:
417+
; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
418+
; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
419+
; UNROLL2-NEXT: [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
420+
; UNROLL2-NEXT: [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
421+
; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
422+
; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
423+
; UNROLL2: [[LOOP_LATCH]]:
424+
; UNROLL2-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
425+
; UNROLL2-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
426+
; UNROLL2-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
427+
; UNROLL2: [[EXIT]]:
428+
; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
429+
; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
430+
; UNROLL2-NEXT: ret i1 [[C_3]]
431+
;
292432
; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
293433
; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
294434
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -363,6 +503,32 @@ define i1 @multi_3_exit_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
363503
; APPLE-NEXT: [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
364504
; APPLE-NEXT: ret i1 [[C_4]]
365505
;
506+
; UNROLL2-LABEL: define i1 @multi_3_exit_find_ptr_loop(
507+
; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
508+
; UNROLL2-NEXT: [[ENTRY:.*]]:
509+
; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
510+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
511+
; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
512+
; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
513+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
514+
; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
515+
; UNROLL2: [[LOOP_HEADER]]:
516+
; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
517+
; UNROLL2-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
518+
; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
519+
; UNROLL2-NEXT: [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
520+
; UNROLL2-NEXT: [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
521+
; UNROLL2-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
522+
; UNROLL2: [[LOOP_LATCH]]:
523+
; UNROLL2-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
524+
; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
525+
; UNROLL2-NEXT: br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
526+
; UNROLL2: [[EXIT]]:
527+
; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
528+
; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
529+
; UNROLL2-NEXT: [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
530+
; UNROLL2-NEXT: ret i1 [[C_4]]
531+
;
366532
; OTHER-LABEL: define i1 @multi_3_exit_find_ptr_loop(
367533
; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
368534
; OTHER-NEXT: [[ENTRY:.*]]:
@@ -448,6 +614,34 @@ define i1 @multi_3_exit_find_i8_loop_switch(ptr %vec, i8 %tgt) {
448614
; APPLE: [[EXIT_2]]:
449615
; APPLE-NEXT: ret i1 true
450616
;
617+
; UNROLL2-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
618+
; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
619+
; UNROLL2-NEXT: [[ENTRY:.*]]:
620+
; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
621+
; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
622+
; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
623+
; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
624+
; UNROLL2: [[LOOP_HEADER]]:
625+
; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
626+
; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
627+
; UNROLL2-NEXT: switch i8 [[L]], label %[[LOOP_LATCH]] [
628+
; UNROLL2-NEXT: i8 0, label %[[EXIT_1:.*]]
629+
; UNROLL2-NEXT: i8 1, label %[[EXIT_2:.*]]
630+
; UNROLL2-NEXT: i8 2, label %[[EXIT:.*]]
631+
; UNROLL2-NEXT: ]
632+
; UNROLL2: [[LOOP_LATCH]]:
633+
; UNROLL2-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
634+
; UNROLL2-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
635+
; UNROLL2-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
636+
; UNROLL2: [[EXIT]]:
637+
; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
638+
; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
639+
; UNROLL2-NEXT: ret i1 [[C_3]]
640+
; UNROLL2: [[EXIT_1]]:
641+
; UNROLL2-NEXT: ret i1 false
642+
; UNROLL2: [[EXIT_2]]:
643+
; UNROLL2-NEXT: ret i1 true
644+
;
451645
; OTHER-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
452646
; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
453647
; OTHER-NEXT: [[ENTRY:.*]]:

0 commit comments

Comments
 (0)