Skip to content

Commit 937b00a

Browse files
committed
[Polly][SchedOpt] Account for prevectorization of multiple statements.
A prevectorized loop may contain multiple statements, in which case isl_schedule_node_band_sink will sink the vector band to multiple leaves. Instead of statically assuming a specific tree structure after sinking, add a SIMD marker to all inner bands. Fixes llvm.org/PR52637
1 parent 3042091 commit 937b00a

File tree

5 files changed

+110
-43
lines changed

5 files changed

+110
-43
lines changed

polly/include/polly/ScheduleTreeTransform.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor
154154
}
155155
};
156156

157+
/// Recursively visit all nodes of a schedule tree while allowing changes.
158+
///
159+
/// The visit methods return an isl::schedule_node that is used to continue
160+
/// visiting the tree. Structural changes such as returning a different node
161+
/// will confuse the visitor.
162+
template <typename Derived, typename... Args>
163+
struct ScheduleNodeRewriter
164+
: public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
165+
Args...> {
166+
Derived &getDerived() { return *static_cast<Derived *>(this); }
167+
const Derived &getDerived() const {
168+
return *static_cast<const Derived *>(this);
169+
}
170+
171+
isl::schedule_node visitNode(isl::schedule_node Node, Args... args) {
172+
return getDerived().visitChildren(Node);
173+
}
174+
175+
isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) {
176+
if (!Node.has_children())
177+
return Node;
178+
179+
isl::schedule_node It = Node.first_child();
180+
while (true) {
181+
It = getDerived().visit(It, std::forward<Args>(args)...);
182+
if (!It.has_next_sibling())
183+
break;
184+
It = It.next_sibling();
185+
}
186+
return It.parent();
187+
}
188+
};
189+
157190
/// Is this node the marker for its parent band?
158191
bool isBandMark(const isl::schedule_node &Node);
159192

polly/lib/Transform/ScheduleOptimizer.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
384384
return Result;
385385
}
386386

387+
struct InsertSimdMarkers : public ScheduleNodeRewriter<InsertSimdMarkers> {
388+
isl::schedule_node visitBand(isl::schedule_node_band Band) {
389+
isl::schedule_node Node = visitChildren(Band);
390+
391+
// Only add SIMD markers to innermost bands.
392+
if (!Node.first_child().isa<isl::schedule_node_leaf>())
393+
return Node;
394+
395+
isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr);
396+
return Band.insert_mark(LoopMarker);
397+
}
398+
};
399+
387400
isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
388401
isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
389402
assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
@@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
408421
Node = Node.child(0);
409422
// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
410423
// we will have troubles to match it in the backend.
411-
isl::schedule_node_band NodeBand =
412-
Node.as<isl::schedule_node_band>().set_ast_build_options(
413-
isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
414-
Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release()));
415-
Node = Node.child(0);
416-
if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
417-
Node = Node.parent();
418-
auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr);
424+
Node = Node.as<isl::schedule_node_band>().set_ast_build_options(
425+
isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
426+
427+
// Sink the inner loop into the smallest possible statements to make them
428+
// represent a single vector instruction if possible.
429+
Node = isl::manage(isl_schedule_node_band_sink(Node.release()));
430+
431+
// Add SIMD markers to those vector statements.
432+
InsertSimdMarkers SimdMarkerInserter;
433+
Node = SimdMarkerInserter.visit(Node);
434+
419435
PrevectOpts++;
420-
return Node.insert_mark(LoopMarker);
436+
return Node.parent();
421437
}
422438

423439
static bool isSimpleInnermostBand(const isl::schedule_node &Node) {

polly/lib/Transform/ScheduleTreeTransform.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand,
118118
return NewBand.get_schedule();
119119
}
120120

121-
/// Recursively visit all nodes of a schedule tree while allowing changes.
122-
///
123-
/// The visit methods return an isl::schedule_node that is used to continue
124-
/// visiting the tree. Structural changes such as returning a different node
125-
/// will confuse the visitor.
126-
template <typename Derived, typename... Args>
127-
struct ScheduleNodeRewriter
128-
: public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
129-
Args...> {
130-
Derived &getDerived() { return *static_cast<Derived *>(this); }
131-
const Derived &getDerived() const {
132-
return *static_cast<const Derived *>(this);
133-
}
134-
135-
isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) {
136-
if (!Node.has_children())
137-
return Node;
138-
139-
isl::schedule_node It = Node.first_child();
140-
while (true) {
141-
It = getDerived().visit(It, std::forward<Args>(args)...);
142-
if (!It.has_next_sibling())
143-
break;
144-
It = It.next_sibling();
145-
}
146-
return It.parent();
147-
}
148-
};
149-
150121
/// Rewrite a schedule tree by reconstructing it bottom-up.
151122
///
152123
/// By default, the original schedule tree is reconstructed. To build a

polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,17 @@ cleanup: ; preds = %for.cond, %entry
8080
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]"
8181
; CHECK: permutable: 1
8282
; CHECK: child:
83-
; CHECK: mark: "SIMD"
84-
; CHECK: child:
85-
; CHECK: sequence:
86-
; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
83+
; CHECK: sequence:
84+
; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
85+
; CHECK: child:
86+
; CHECK: mark: "SIMD"
8787
; CHECK: child:
8888
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
8989
; CHECK: permutable: 1
9090
; CHECK: coincident: [ 1 ]
91-
; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
91+
; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
92+
; CHECK: child:
93+
; CHECK: mark: "SIMD"
9294
; CHECK: child:
9395
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
9496
; CHECK: permutable: 1
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s
2+
3+
; isl_schedule_node_band_sink may sink into multiple children.
4+
; https://llvm.org/PR52637
5+
6+
%struct.v4l2_sliced_vbi_data = type { [48 x i8] }
7+
8+
define void @vivid_vbi_gen_sliced() {
9+
entry:
10+
br label %for.body
11+
12+
for.body: ; preds = %vivid_vbi_gen_teletext.exit, %entry
13+
%i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ]
14+
%data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ]
15+
%arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0
16+
%arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6
17+
%0 = load i8, i8* %arrayidx.i, align 1
18+
store i8 %0, i8* %arraydecay, align 1
19+
br label %for.body.for.body_crit_edge.i
20+
21+
for.body.for.body_crit_edge.i: ; preds = %for.body.for.body_crit_edge.i, %for.body
22+
%inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ]
23+
%arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13
24+
store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1
25+
%inc10.i = add nuw nsw i32 %inc10.i13, 1
26+
%exitcond.not.i = icmp eq i32 %inc10.i13, 42
27+
br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i
28+
29+
vivid_vbi_gen_teletext.exit: ; preds = %for.body.for.body_crit_edge.i
30+
%incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1
31+
%inc = add nuw nsw i32 %i.015, 1
32+
%exitcond.not = icmp eq i32 %i.015, 1
33+
br i1 %exitcond.not, label %for.end, label %for.body
34+
35+
for.end: ; preds = %vivid_vbi_gen_teletext.exit
36+
ret void
37+
}
38+
39+
40+
; CHECK: schedule:
41+
; CHECK: schedule:
42+
; CHECK: mark: "SIMD"
43+
; CHECK: schedule:
44+
; CHECK: mark: "SIMD"
45+
; CHECK: schedule:

0 commit comments

Comments
 (0)