Skip to content

Commit af51c9d

Browse files
authored
[LV][NFC] Add branch weight test showing incorrect behaviour (#144682)
This patch adds a test that shows incorrect branch weights being set in function EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
1 parent 97c1a24 commit af51c9d

File tree

2 files changed

+129
-55
lines changed

2 files changed

+129
-55
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7683,6 +7683,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
76837683
BranchInst &BI =
76847684
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
76857685
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7686+
// FIXME: See test Transforms/LoopVectorize/branch-weights.ll. I don't
7687+
// think the MainLoopStep is correct.
76867688
unsigned MainLoopStep = UF * VF.getKnownMinValue();
76877689
unsigned EpilogueLoopStep =
76887690
EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
Lines changed: 127 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,103 @@
1-
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
2+
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \
3+
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
4+
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
5+
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
26

3-
; CHECK-LABEL: @f0(
4-
;
5-
; CHECK: entry:
6-
; CHECK: br i1 %cmp.entry, label %iter.check, label %exit, !prof [[PROF_F0_ENTRY:![0-9]+]]
7-
;
8-
; CHECK: iter.check:
9-
; CHECK: br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.scevcheck, !prof [[PROF_F0_UNLIKELY:![0-9]+]]
10-
;
11-
; CHECK: vector.scevcheck:
12-
; CHECK: br i1 %4, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check, !prof [[PROF_F0_UNLIKELY]]
13-
;
14-
; CHECK: vector.main.loop.iter.check:
15-
; CHECK: br i1 %min.iters.check1, label %vec.epilog.ph, label %vector.ph, !prof [[PROF_F0_UNLIKELY]]
16-
;
17-
; CHECK: vector.ph:
18-
; CHECK: br label %vector.body
19-
;
20-
; CHECK: vector.body:
21-
; CHECK: br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]]
22-
;
23-
; CHECK: middle.block:
24-
; CHECK: br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
25-
;
26-
; CHECK: vec.epilog.iter.check:
27-
; CHECK: br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_VEC_EPILOGUE_SKIP:![0-9]+]]
28-
;
29-
; CHECK: vec.epilog.ph:
30-
; CHECK: br label %vec.epilog.vector.body
31-
;
32-
; CHECK: vec.epilog.vector.body:
33-
; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]]
34-
;
35-
; CHECK: vec.epilog.middle.block:
36-
; CHECK: br i1 %cmp.n{{.+}}, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
37-
;
38-
; CHECK: vec.epilog.scalar.ph:
39-
; CHECK: br label %loop
40-
;
41-
; CHECK: loop:
42-
; CHECK: br i1 %cmp.loop, label %loop, label %exit.loopexit, !prof [[PROF_F0_LOOP:![0-9]+]]
7+
; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of
8+
; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8
9+
; scalar iterations in the main loop, leaving the remaining count to
10+
; be in the range [0,7]. That gives a 4:4 chance of skipping the
11+
; vector epilogue. I believe the problem lies in
12+
; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
13+
; where the main loop VF is set to the same value as the epilogue VF.
14+
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
15+
; MAINVF4IC1_EPI4-LABEL: define void @f0(
16+
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
17+
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
18+
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
19+
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
20+
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
21+
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
22+
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
23+
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
24+
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
25+
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
26+
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
27+
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
28+
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
29+
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
30+
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
31+
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
32+
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
33+
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
34+
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
35+
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
36+
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
37+
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
38+
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
39+
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
40+
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
41+
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
42+
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
43+
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
44+
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
45+
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
46+
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
47+
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
48+
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
49+
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
50+
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
51+
; MAINVF4IC1_EPI4: [[LOOP]]:
52+
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
53+
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
54+
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
55+
; MAINVF4IC1_EPI4: br label %[[EXIT]]
56+
; MAINVF4IC1_EPI4: [[EXIT]]:
4357
;
44-
; CHECK: exit.loopexit:
45-
; CHECK: br label %exit
58+
; MAINVF4IC2_EPI4-LABEL: define void @f0(
59+
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
60+
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
61+
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
62+
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
63+
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
64+
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
65+
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
66+
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
67+
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
68+
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
69+
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
70+
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
71+
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
72+
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
73+
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
74+
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
75+
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
76+
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
77+
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
78+
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
79+
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
80+
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
81+
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
82+
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
83+
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
84+
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
85+
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
86+
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
87+
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
88+
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
89+
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
90+
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
91+
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
92+
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
93+
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
94+
; MAINVF4IC2_EPI4: [[LOOP]]:
95+
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
96+
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
97+
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
98+
; MAINVF4IC2_EPI4: br label %[[EXIT]]
99+
; MAINVF4IC2_EPI4: [[EXIT]]:
46100
;
47-
; CHECK: exit:
48-
; CHECK: ret void
49-
50-
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
51101
entry:
52102
%cmp.entry = icmp sgt i32 %len, 0
53103
br i1 %cmp.entry, label %loop, label %exit, !prof !1
@@ -72,11 +122,33 @@ exit:
72122
!0 = !{!"function_entry_count", i64 13}
73123
!1 = !{!"branch_weights", i32 12, i32 1}
74124
!2 = !{!"branch_weights", i32 1234, i32 1}
75-
76-
; CHECK: [[PROF_F0_ENTRY]] = !{!"branch_weights", i32 12, i32 1}
77-
; CHECK: [[PROF_F0_UNLIKELY]] = !{!"branch_weights", i32 1, i32 127}
78-
; CHECK: [[PROF_F0_VECTOR_BODY]] = !{!"branch_weights", i32 1, i32 307}
79-
; CHECK: [[PROF_F0_MIDDLE_BLOCKS]] = !{!"branch_weights", i32 1, i32 3}
80-
; CHECK: [[PROF_F0_VEC_EPILOGUE_SKIP]] = !{!"branch_weights", i32 4, i32 0}
81-
; CHECK: [[PROF_F0_VEC_EPILOG_VECTOR_BODY]] = !{!"branch_weights", i32 0, i32 0}
82-
; CHECK: [[PROF_F0_LOOP]] = !{!"branch_weights", i32 2, i32 1}
125+
;.
126+
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
127+
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
128+
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
129+
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
130+
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
131+
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
132+
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
133+
; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
134+
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
135+
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
136+
; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
137+
; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
138+
; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
139+
;.
140+
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
141+
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
142+
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
143+
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
144+
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
145+
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
146+
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
147+
; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
148+
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
149+
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
150+
; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
151+
; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
152+
; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
153+
; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
154+
;.

0 commit comments

Comments
 (0)