Skip to content

Commit 15d9d0f

Browse files
authored
[VPlan] Also print final VPlan directly before codegen/execute. (llvm#82269)
Some optimizations are apply after UF and VF have been chosen. This patch adds an extra print of the final VPlan just before codegen/execution. In the future, there will be additional transforms that are applied later (interleaving for example). PR: llvm#82269
1 parent 8a5d51b commit 15d9d0f

File tree

3 files changed

+97
-5
lines changed

3 files changed

+97
-5
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7450,12 +7450,14 @@ LoopVectorizationPlanner::executePlan(
74507450
(IsEpilogueVectorization || !ExpandedSCEVs) &&
74517451
"expanded SCEVs to reuse can only be used during epilogue vectorization");
74527452

7453-
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF
7454-
<< '\n');
7455-
74567453
if (!IsEpilogueVectorization)
74577454
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
74587455

7456+
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
7457+
<< ", UF=" << BestUF << '\n');
7458+
BestVPlan.setName("Final VPlan");
7459+
LLVM_DEBUG(BestVPlan.dump());
7460+
74597461
// Perform the actual loop transformation.
74607462
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
74617463
OrigLoop->getHeader()->getContext());

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
120120
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
121121
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
122122
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
123-
; CHECK-NEXT: LV: Interleaving disabled by the pass manager
123+
; CHECK: LV: Interleaving disabled by the pass manager
124124
; CHECK-NEXT: LV: Vectorizing: innermost loop.
125125
;
126126
entry:
@@ -260,7 +260,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
260260
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
261261
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
262262
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
263-
; CHECK-NEXT: LV: Interleaving disabled by the pass manager
263+
; CHECK: LV: Interleaving disabled by the pass manager
264264
; CHECK-NEXT: LV: Vectorizing: innermost loop.
265265
;
266266
entry:
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; RUN: opt -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=2 -disable-output -debug -S %s 2>&1 | FileCheck --check-prefixes=CHECK %s
2+
3+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
4+
5+
; REQUIRES: asserts
6+
7+
; Check if the vector loop condition can be simplified to true for a given
8+
; VF/IC combination.
9+
define void @test_tc_less_than_16(ptr %A, i64 %N) {
10+
; CHECK: LV: Scalarizing: %cmp =
11+
; CHECK-NEXT: VPlan 'Initial VPlan for VF={8},UF>=1' {
12+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
13+
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
14+
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
15+
; CHECK-EMPTY:
16+
; CHECK-NEXT: ph:
17+
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
18+
; CHECK-NEXT: No successors
19+
; CHECK-EMPTY:
20+
; CHECK-NEXT: vector.ph:
21+
; CHECK-NEXT: Successor(s): vector loop
22+
; CHECK-EMPTY:
23+
; CHECK-NEXT: <x1> vector loop: {
24+
; CHECK-NEXT: vector.body:
25+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
26+
; CHECK-NEXT: EMIT ir<%p.src> = WIDEN-POINTER-INDUCTION ir<%A>, 1
27+
; CHECK-NEXT: vp<[[VPTR:%.]]> = vector-pointer ir<%p.src>
28+
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR]]>
29+
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
30+
; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer ir<%p.src>
31+
; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>
32+
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV:%.+]]>, vp<[[VFxUF]]>
33+
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
34+
; CHECK-NEXT: No successors
35+
; CHECK-NEXT: }
36+
; CHECK-NEXT: Successor(s): middle.block
37+
; CHECK-EMPTY:
38+
; CHECK-NEXT: middle.block:
39+
; CHECK-NEXT: No successors
40+
; CHECK-NEXT: }
41+
;
42+
; CHECK: Executing best plan with VF=8, UF=2
43+
; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' {
44+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
45+
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
46+
; CHECK-EMPTY:
47+
; CHECK-NEXT: ph:
48+
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
49+
; CHECK-NEXT: No successors
50+
; CHECK-EMPTY:
51+
; CHECK-NEXT: vector.ph:
52+
; CHECK-NEXT: Successor(s): vector loop
53+
; CHECK-EMPTY:
54+
; CHECK-NEXT: <x1> vector loop: {
55+
; CHECK-NEXT: vector.body:
56+
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
57+
; CHECK-NEXT: EMIT ir<%p.src> = WIDEN-POINTER-INDUCTION ir<%A>, 1
58+
; CHECK-NEXT: vp<[[VPTR:%.]]> = vector-pointer ir<%p.src>
59+
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR]]>
60+
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
61+
; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer ir<%p.src>
62+
; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>
63+
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV:%.+]]>, vp<[[VFxUF]]>
64+
; CHECK-NEXT: EMIT branch-on-cond ir<true>
65+
; CHECK-NEXT: No successors
66+
; CHECK-NEXT: }
67+
; CHECK-NEXT: Successor(s): middle.block
68+
; CHECK-EMPTY:
69+
; CHECK-NEXT: middle.block:
70+
; CHECK-NEXT: No successors
71+
; CHECK-NEXT: }
72+
;
73+
entry:
74+
%and = and i64 %N, 15
75+
br label %loop
76+
77+
loop:
78+
%iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
79+
%p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
80+
%p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
81+
%l = load i8, ptr %p.src, align 1
82+
%add = add nsw i8 %l, 10
83+
store i8 %add, ptr %p.src
84+
%iv.next = add nsw i64 %iv, -1
85+
%cmp = icmp eq i64 %iv.next, 0
86+
br i1 %cmp, label %exit, label %loop
87+
88+
exit:
89+
ret void
90+
}

0 commit comments

Comments
 (0)