Skip to content

Commit 93c8fb7

Browse files
committed
Address review comments
And check if the trip count matches the canonical IV.
1 parent 6066837 commit 93c8fb7

File tree

2 files changed

+113
-5
lines changed

2 files changed

+113
-5
lines changed

llvm/lib/CodeGen/EVLIndVarSimplify.cpp

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) {
9595
if (const APInt *Fixed = CR.getSingleElement()) {
9696
V = V.zextOrTrunc(Fixed->getBitWidth());
9797
uint64_t VF = V.udiv(*Fixed).getLimitedValue();
98-
if (VF && llvm::isUInt<32>(VF))
98+
if (VF && llvm::isUInt<32>(VF) &&
99+
// Make sure step is dividable by vscale.
100+
V.urem(*Fixed).isZero())
99101
return static_cast<uint32_t>(VF);
100102
}
101103
}
@@ -113,14 +115,14 @@ static void tryCleanupOriginalIndVar(PHINode *OrigIndVar,
113115
if (InitValue != IVD.getStartValue())
114116
std::swap(InitValue, RecValue);
115117

116-
// If the only user of OrigIndVar is the one produces RecValue, then we can
117-
// safely remove it.
118+
// If the only user of OrigIndVar is the one that produces RecValue, then we
119+
// can safely remove it.
118120
if (!OrigIndVar->hasOneUse() || OrigIndVar->user_back() != RecValue)
119121
return;
120122

121123
LLVM_DEBUG(dbgs() << "Removed the original IndVar " << *OrigIndVar << "\n");
122-
// Remove OrigIndVar by replacing all its uses by the initial value of this
123-
// loop. Then DCE will take care of the rest.
124+
// Turn OrigIndVar into dead code by replacing all its uses by the initial
125+
// value of this loop.
124126
OrigIndVar->replaceAllUsesWith(InitValue);
125127
OrigIndVar->eraseFromParent();
126128
}
@@ -153,6 +155,8 @@ bool EVLIndVarSimplifyImpl::run(Loop &L) {
153155
}
154156
Value *CanonicalIVInit = &Bounds->getInitialIVValue();
155157
Value *CanonicalIVFinal = &Bounds->getFinalIVValue();
158+
const SCEV *CanonicalIVInitV = SE.getSCEV(CanonicalIVInit);
159+
const SCEV *CanonicalIVFinalV = SE.getSCEV(CanonicalIVFinal);
156160

157161
const SCEV *StepV = IVD.getStep();
158162
uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent());
@@ -222,6 +226,29 @@ bool EVLIndVarSimplifyImpl::run(Loop &L) {
222226
if (!EVLIndVar || !TC)
223227
return false;
224228

229+
// Make sure TC is related to the original trip count of the canonical IV.
230+
// Specifically, if the canonical trip count is derived from TC.
231+
const SCEV *TCV = SE.getSCEV(TC);
232+
bool MatchTC = false;
233+
if (const auto *ConstTCV = dyn_cast<SCEVConstant>(TCV)) {
234+
// If TC is a constant and vscale is also a constant, then the canonical
235+
// trip count will be constant. Canonical trip count * Step equals to the
236+
// round up of TC.
237+
if (const auto *ConstStep = dyn_cast<SCEVConstant>(StepV))
238+
if (unsigned CanonicalTC = SE.getSmallConstantTripCount(&L)) {
239+
APInt Step = ConstStep->getAPInt().abs().zextOrTrunc(64);
240+
APInt CanonicalTripCount(64, CanonicalTC);
241+
APInt TripCount = ConstTCV->getAPInt().zextOrTrunc(64);
242+
MatchTC = (CanonicalTripCount * Step - TripCount).ult(Step);
243+
}
244+
}
245+
// Otherwise, we simply check if the upper or lower bound expression of the
246+
// canonical IV contains TC.
247+
auto equalsTC = [&](const SCEV *S) -> bool { return S == TCV; };
248+
if (!MatchTC && !llvm::SCEVExprContains(CanonicalIVFinalV, equalsTC) &&
249+
!llvm::SCEVExprContains(CanonicalIVInitV, equalsTC))
250+
return false;
251+
225252
LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
226253

227254
// Create an EVL-based comparison and replace the branch to use it as
@@ -259,6 +286,9 @@ INITIALIZE_PASS_END(EVLIndVarSimplify, DEBUG_TYPE,
259286
"EVL-based Induction Variables Simplify", false, false)
260287

261288
bool EVLIndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
289+
if (skipLoop(L))
290+
return false;
291+
262292
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
263293
return EVLIndVarSimplifyImpl(SE).run(*L);
264294
}

llvm/test/CodeGen/RISCV/evl-iv-simplify.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,84 @@ for.end:
237237
ret void
238238
}
239239

240+
; Fixed IV step and trip count
241+
define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
242+
; CHECK-LABEL: define void @fixed_iv_step_tc(
243+
; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
244+
; CHECK-NEXT: entry:
245+
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
246+
; CHECK: vector.ph:
247+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nsw i64 87, 15
248+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16
249+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
250+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
251+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
252+
; CHECK: vector.body:
253+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
254+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
255+
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
256+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
257+
; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
258+
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
259+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
260+
; CHECK-NEXT: [[LSR_IV_NEXT33:%.*]] = add i64 [[N_VEC]], -16
261+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
262+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[LSR_IV_NEXT33]], 0
263+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
264+
; CHECK: for.end.loopexit5:
265+
; CHECK-NEXT: br label [[FOR_END:%.*]]
266+
; CHECK: for.end:
267+
; CHECK-NEXT: ret void
268+
;
269+
; LOOP-DEL-LABEL: define void @fixed_iv_step_tc(
270+
; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
271+
; LOOP-DEL-NEXT: entry:
272+
; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
273+
; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
274+
; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]]
275+
; LOOP-DEL: vector.body:
276+
; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
277+
; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
278+
; LOOP-DEL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
279+
; LOOP-DEL-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
280+
; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
281+
; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
282+
; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
283+
; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
284+
; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
285+
; LOOP-DEL: for.end:
286+
; LOOP-DEL-NEXT: ret void
287+
;
288+
entry:
289+
br label %vector.ph
290+
291+
vector.ph:
292+
%n.rnd.up = add nsw i64 87, 15
293+
%n.vec = and i64 %n.rnd.up, -16
294+
%broadcast.splatinsert = insertelement <vscale x 2 x ptr> poison, ptr %arg0, i64 0
295+
%broadcast.splat = shufflevector <vscale x 2 x ptr> %broadcast.splatinsert, <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
296+
br label %vector.body
297+
298+
vector.body:
299+
%lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ]
300+
%evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
301+
%41 = sub i64 87, %evl.based.iv
302+
%42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
303+
%gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
304+
tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
305+
%43 = zext i32 %42 to i64
306+
%index.evl.next = add i64 %evl.based.iv, %43
307+
%lsr.iv.next33 = add i64 %lsr.iv32, -16
308+
%44 = icmp eq i64 %lsr.iv.next33, 0
309+
br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3
310+
311+
for.end.loopexit5:
312+
br label %for.end
313+
314+
for.end:
315+
ret void
316+
}
317+
240318
declare i64 @llvm.vscale.i64()
241319

242320
declare i32 @llvm.experimental.get.vector.length.i64(i64, i32 immarg, i1 immarg)

0 commit comments

Comments
 (0)