Skip to content

Commit e36abf6

Browse files
committed
isl: Detect openmp parallelism
Based on code written by Riyadh Baghdadi. llvm-svn: 170102
1 parent a167087 commit e36abf6

7 files changed

+467
-6
lines changed

polly/lib/CodeGen/IslAst.cpp

Lines changed: 205 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "polly/CodeGen/IslAst.h"
2323

2424
#include "polly/LinkAllPasses.h"
25+
#include "polly/Dependences.h"
2526
#include "polly/ScopInfo.h"
2627

2728
#define DEBUG_TYPE "polly-ast"
@@ -43,10 +44,14 @@ static cl::opt<bool>
4344
UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden,
4445
cl::init(false), cl::ZeroOrMore);
4546

47+
static cl::opt<bool>
48+
DetectParallel("polly-ast-detect-parallel", cl::desc("Detect parallelism"),
49+
cl::Hidden, cl::init(false), cl::ZeroOrMore);
50+
4651
namespace polly {
4752
class IslAst {
4853
public:
49-
IslAst(Scop *Scop);
54+
IslAst(Scop *Scop, Dependences &D);
5055

5156
~IslAst();
5257

@@ -72,8 +77,183 @@ static void IslAstUserFree(void *User)
7277
free(UserStruct);
7378
}
7479

75-
static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node,
76-
__isl_keep isl_ast_build *Context, void *User)
80+
// Information about an ast node.
81+
struct AstNodeUserInfo {
82+
// The node is the outermost parallel loop.
83+
int IsOutermostParallel;
84+
};
85+
86+
// Temporary information used when building the ast.
87+
struct AstBuildUserInfo {
88+
// The dependence information.
89+
Dependences *Deps;
90+
91+
// We are inside a parallel for node.
92+
int InParallelFor;
93+
};
94+
95+
// Print a loop annotated with OpenMP pragmas.
96+
static __isl_give isl_printer *
97+
printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
98+
__isl_take isl_ast_print_options *PrintOptions,
99+
AstNodeUserInfo *Info) {
100+
if (Info && Info->IsOutermostParallel) {
101+
Printer = isl_printer_start_line(Printer);
102+
if (Info->IsOutermostParallel)
103+
Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
104+
Printer = isl_printer_end_line(Printer);
105+
}
106+
return isl_ast_node_for_print(Node, Printer, PrintOptions);
107+
}
108+
109+
// Print an isl_ast_for.
110+
static __isl_give isl_printer *
111+
printFor(__isl_take isl_printer *Printer,
112+
__isl_take isl_ast_print_options *PrintOptions,
113+
__isl_keep isl_ast_node *Node, void *User) {
114+
isl_id *Id = isl_ast_node_get_annotation(Node);
115+
if (!Id)
116+
return isl_ast_node_for_print(Node, Printer, PrintOptions);
117+
118+
struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
119+
Printer = printParallelFor(Node, Printer, PrintOptions, Info);
120+
isl_id_free(Id);
121+
return Printer;
122+
}
123+
124+
// Allocate an AstNodeInfo structure and initialize it with default values.
125+
static struct AstNodeUserInfo *allocateAstNodeUserInfo() {
126+
struct AstNodeUserInfo *NodeInfo;
127+
NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo));
128+
NodeInfo->IsOutermostParallel = 0;
129+
return NodeInfo;
130+
}
131+
132+
// Free the AstNodeInfo structure.
133+
static void freeAstNodeUserInfo(void *Ptr) {
134+
struct AstNodeUserInfo *Info;
135+
Info = (struct AstNodeUserInfo *) Ptr;
136+
free(Info);
137+
}
138+
139+
// Check if the current scheduling dimension is parallel.
140+
//
141+
// We check for parallelism by verifying that the loop does not carry any
142+
// dependences.
143+
//
144+
// Parallelism test: if the distance is zero in all outer dimensions, then it
145+
// has to be zero in the current dimension as well.
146+
//
147+
// Implementation: first, translate dependences into time space, then force
148+
// outer dimensions to be equal. If the distance is zero in the current
149+
// dimension, then the loop is parallel. The distance is zero in the current
150+
// dimension if it is a subset of a map with equal values for the current
151+
// dimension.
152+
static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
153+
Dependences *D) {
154+
isl_union_map *Schedule, *Deps;
155+
isl_map *ScheduleDeps, *Test;
156+
isl_space *ScheduleSpace;
157+
unsigned Dimension, IsParallel;
158+
159+
Schedule = isl_ast_build_get_schedule(Build);
160+
ScheduleSpace = isl_ast_build_get_schedule_space(Build);
161+
162+
Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;
163+
164+
Deps = D->getDependences(Dependences::TYPE_ALL);
165+
Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
166+
Deps = isl_union_map_apply_domain(Deps, Schedule);
167+
168+
if (isl_union_map_is_empty(Deps)) {
169+
isl_union_map_free(Deps);
170+
isl_space_free(ScheduleSpace);
171+
return 1;
172+
}
173+
174+
ScheduleDeps = isl_map_from_union_map(Deps);
175+
176+
for (unsigned i = 0; i < Dimension; i++)
177+
ScheduleDeps = isl_map_equate(ScheduleDeps, isl_dim_out, i, isl_dim_in, i);
178+
179+
Test = isl_map_universe(isl_map_get_space(ScheduleDeps));
180+
Test = isl_map_equate(Test, isl_dim_out, Dimension, isl_dim_in, Dimension);
181+
IsParallel = isl_map_is_subset(ScheduleDeps, Test);
182+
183+
isl_space_free(ScheduleSpace);
184+
isl_map_free(Test);
185+
isl_map_free(ScheduleDeps);
186+
187+
return IsParallel;
188+
}
189+
190+
// Mark a for node openmp parallel, if it is the outermost parallel for node.
191+
static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
192+
struct AstBuildUserInfo *BuildInfo,
193+
struct AstNodeUserInfo *NodeInfo) {
194+
if (BuildInfo->InParallelFor)
195+
return;
196+
197+
if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
198+
BuildInfo->InParallelFor = 1;
199+
NodeInfo->IsOutermostParallel = 1;
200+
}
201+
}
202+
203+
// This method is executed before the construction of a for node. It creates
204+
// an isl_id that is used to annotate the subsequently generated ast for nodes.
205+
//
206+
// In this function we also run the following analyses:
207+
//
208+
// - Detection of openmp parallel loops
209+
//
210+
static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
211+
void *User) {
212+
isl_id *Id;
213+
struct AstBuildUserInfo *BuildInfo;
214+
struct AstNodeUserInfo *NodeInfo;
215+
216+
BuildInfo = (struct AstBuildUserInfo *) User;
217+
NodeInfo = allocateAstNodeUserInfo();
218+
Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
219+
Id = isl_id_set_free_user(Id, freeAstNodeUserInfo);
220+
221+
markOpenmpParallel(Build, BuildInfo, NodeInfo);
222+
223+
return Id;
224+
}
225+
226+
// This method is executed after the construction of a for node.
227+
//
228+
// It performs the following actions:
229+
//
230+
// - Reset the 'InParallelFor' flag, as soon as we leave a for node,
231+
// that is marked as openmp parallel.
232+
//
233+
static __isl_give isl_ast_node *
234+
astBuildAfterFor(__isl_take isl_ast_node *Node,
235+
__isl_keep isl_ast_build *Build, void *User) {
236+
isl_id *Id;
237+
struct AstBuildUserInfo *BuildInfo;
238+
struct AstNodeUserInfo *Info;
239+
240+
Id = isl_ast_node_get_annotation(Node);
241+
if (!Id)
242+
return Node;
243+
Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
244+
if (Info && Info->IsOutermostParallel) {
245+
BuildInfo = (struct AstBuildUserInfo *) User;
246+
BuildInfo->InParallelFor = 0;
247+
}
248+
249+
isl_id_free(Id);
250+
251+
return Node;
252+
}
253+
254+
static __isl_give isl_ast_node *
255+
AtEachDomain(__isl_keep isl_ast_node *Node,
256+
__isl_keep isl_ast_build *Context, void *User)
77257
{
78258
isl_map *Map;
79259
struct IslAstUser *UserStruct;
@@ -90,10 +270,11 @@ static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node,
90270
return isl_ast_node_set_annotation(Node, Annotation);
91271
}
92272

93-
IslAst::IslAst(Scop *Scop) : S(Scop) {
273+
IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) {
94274
isl_ctx *Ctx = S->getIslCtx();
95275
isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
96276
isl_ast_build *Context;
277+
struct AstBuildUserInfo BuildInfo;
97278

98279
if (UseContext)
99280
Context = isl_ast_build_from_context(S->getContext());
@@ -112,6 +293,16 @@ IslAst::IslAst(Scop *Scop) : S(Scop) {
112293
isl_union_map_dump(Schedule);
113294
);
114295

296+
if (DetectParallel) {
297+
BuildInfo.Deps = &D;
298+
BuildInfo.InParallelFor = 0;
299+
300+
Context = isl_ast_build_set_before_each_for(Context, &astBuildBeforeFor,
301+
&BuildInfo);
302+
Context = isl_ast_build_set_after_each_for(Context, &astBuildAfterFor,
303+
&BuildInfo);
304+
}
305+
115306
Root = isl_ast_build_ast_from_schedule(Context, Schedule);
116307

117308
isl_ast_build_free(Context);
@@ -141,7 +332,11 @@ IslAst::~IslAst() {
141332
/// Print a C like representation of the program.
142333
void IslAst::pprint(llvm::raw_ostream &OS) {
143334
isl_ast_node *Root;
144-
isl_ast_print_options *Options = isl_ast_print_options_alloc(S->getIslCtx());
335+
isl_ast_print_options *Options;
336+
337+
Options = isl_ast_print_options_alloc(S->getIslCtx());
338+
Options = isl_ast_print_options_set_print_for(Options, &printFor, NULL);
339+
145340
isl_printer *P = isl_printer_to_str(S->getIslCtx());
146341
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
147342
Root = getAst();
@@ -174,7 +369,9 @@ bool IslAstInfo::runOnScop(Scop &Scop) {
174369

175370
S = &Scop;
176371

177-
Ast = new IslAst(&Scop);
372+
Dependences &D = getAnalysis<Dependences>();
373+
374+
Ast = new IslAst(&Scop, D);
178375

179376
return false;
180377
}
@@ -195,12 +392,14 @@ void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const {
195392
// Get the Common analysis usage of ScopPasses.
196393
ScopPass::getAnalysisUsage(AU);
197394
AU.addRequired<ScopInfo>();
395+
AU.addRequired<Dependences>();
198396
}
199397
char IslAstInfo::ID = 0;
200398

201399
INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast",
202400
"Generate an AST of the SCoP (isl)", false, false)
203401
INITIALIZE_PASS_DEPENDENCY(ScopInfo)
402+
INITIALIZE_PASS_DEPENDENCY(Dependences)
204403
INITIALIZE_PASS_END(IslAstInfo, "polly-ast",
205404
"Generate an AST from the SCoP (isl)", false, false)
206405

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
2+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3+
target triple = "x86_64-pc-linux-gnu"
4+
5+
; for (i = 0; i < 1024; i++)
6+
; for (j = 0; j < 1024; j++)
7+
; A[i][j] = 1;
8+
9+
@A = common global [1024 x [1024 x i32]] zeroinitializer
10+
define void @bar() {
11+
start:
12+
fence seq_cst
13+
br label %loop.i
14+
15+
loop.i:
16+
%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
17+
%exitcond.i = icmp ne i64 %i, 1024
18+
br i1 %exitcond.i, label %loop.j, label %ret
19+
20+
loop.j:
21+
%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
22+
%exitcond.j = icmp ne i64 %j, 1024
23+
br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
24+
25+
loop.body:
26+
%scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
27+
store i32 1, i32* %scevgep
28+
br label %loop.j.backedge
29+
30+
loop.j.backedge:
31+
%j.next = add nsw i64 %j, 1
32+
br label %loop.j
33+
34+
loop.i.backedge:
35+
%i.next = add nsw i64 %i, 1
36+
br label %loop.i
37+
38+
ret:
39+
fence seq_cst
40+
ret void
41+
}
42+
43+
; CHECK: #pragma omp parallel for
44+
; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1)
45+
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
46+
; CHECK: Stmt_loop_body(c1, c3);
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
2+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3+
target triple = "x86_64-pc-linux-gnu"
4+
5+
; for (i = 0; i < n; i++)
6+
; for (j = 0; j < n; j++)
7+
; A[i][j] = 1;
8+
9+
@A = common global [1024 x [1024 x i32]] zeroinitializer
10+
define void @bar(i64 %n) {
11+
start:
12+
fence seq_cst
13+
br label %loop.i
14+
15+
loop.i:
16+
%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
17+
%exitcond.i = icmp ne i64 %i, %n
18+
br i1 %exitcond.i, label %loop.j, label %ret
19+
20+
loop.j:
21+
%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
22+
%exitcond.j = icmp ne i64 %j, %n
23+
br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
24+
25+
loop.body:
26+
%scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
27+
store i32 1, i32* %scevgep
28+
br label %loop.j.backedge
29+
30+
loop.j.backedge:
31+
%j.next = add nsw i64 %j, 1
32+
br label %loop.j
33+
34+
loop.i.backedge:
35+
%i.next = add nsw i64 %i, 1
36+
br label %loop.i
37+
38+
ret:
39+
fence seq_cst
40+
ret void
41+
}
42+
43+
; At the first look both loops seem parallel, however due to the delinearization
44+
; we get the following dependences:
45+
; [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]:
46+
; 0 <= i0 < n - 1024 and 1 <= i1 < n}
47+
; They cause the outer loop to be non-parallel. We can only prove their
48+
; absence, if we know that n < 1024. This information is currently not available
49+
; to polly. However, we should be able to obtain it due to the out of bounds
50+
; memory accesses, that would happen if n >= 1024.
51+
;
52+
; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
53+
; CHECK: #pragma omp parallel for
54+
; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
55+
; CHECK: Stmt_loop_body(c1, c3);

0 commit comments

Comments
 (0)