5
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
6
//
7
7
// ===----------------------------------------------------------------------===//
8
- // Lower omp workshare construct.
8
+ //
9
+ // This file implements the lowering of omp.workshare to other omp constructs.
10
+ //
11
+ // This pass is tasked with parallelizing the loops nested in
12
+ // workshare_loop_wrapper while both the Fortran to mlir lowering and the hlfir
13
+ // to fir lowering pipelines are responsible for emitting the
14
+ // workshare_loop_wrapper ops where appropriate according to the
15
+ // `shouldUseWorkshareLowering` function.
16
+ //
9
17
// ===----------------------------------------------------------------------===//
10
18
11
19
#include < flang/Optimizer/Builder/FIRBuilder.h>
@@ -44,25 +52,52 @@ namespace flangomp {
44
52
using namespace mlir ;
45
53
46
54
namespace flangomp {
55
+
56
+ // Checks for nesting pattern below as we need to avoid sharing the work of
57
+ // statements which are nested in some constructs such as omp.critical or
58
+ // another omp.parallel.
59
+ //
60
+ // omp.workshare { // `wsOp`
61
+ // ...
62
+ // omp.T { // `parent`
63
+ // ...
64
+ // `op`
65
+ //
66
+ template <typename T>
67
+ static bool isNestedIn (omp::WorkshareOp wsOp, Operation *op) {
68
+ T parent = op->getParentOfType <T>();
69
+ if (!parent)
70
+ return false ;
71
+ return wsOp->isProperAncestor (parent);
72
+ }
73
+
47
74
bool shouldUseWorkshareLowering (Operation *op) {
48
- // TODO this is insufficient, as we could have
49
- // omp.parallel {
50
- // omp.workshare {
51
- // omp.parallel {
52
- // hlfir.elemental {}
53
- //
54
- // Then this hlfir.elemental shall _not_ use the lowering for workshare
55
- //
56
- // Standard says:
57
- // For a parallel construct, the construct is a unit of work with respect to
58
- // the workshare construct. The statements contained in the parallel
59
- // construct are executed by a new thread team.
60
- //
61
- // TODO similarly for single, critical, etc. Need to think through the
62
- // patterns and implement this function.
63
- //
64
- return op->getParentOfType <omp::WorkshareOp>();
75
+ auto parentWorkshare = op->getParentOfType <omp::WorkshareOp>();
76
+
77
+ if (!parentWorkshare)
78
+ return false ;
79
+
80
+ if (isNestedIn<omp::CriticalOp>(parentWorkshare, op))
81
+ return false ;
82
+
83
+ // 2.8.3 workshare Construct
84
+ // For a parallel construct, the construct is a unit of work with respect to
85
+ // the workshare construct. The statements contained in the parallel construct
86
+ // are executed by a new thread team.
87
+ if (isNestedIn<omp::ParallelOp>(parentWorkshare, op))
88
+ return false ;
89
+
90
+ // 2.8.2 single Construct
91
+ // Binding The binding thread set for a single region is the current team. A
92
+ // single region binds to the innermost enclosing parallel region.
93
+ // Description Only one of the encountering threads will execute the
94
+ // structured block associated with the single construct.
95
+ if (isNestedIn<omp::SingleOp>(parentWorkshare, op))
96
+ return false ;
97
+
98
+ return true ;
65
99
}
100
+
66
101
} // namespace flangomp
67
102
68
103
namespace {
@@ -72,19 +107,27 @@ struct SingleRegion {
72
107
};
73
108
74
109
static bool mustParallelizeOp (Operation *op) {
75
- // TODO as in shouldUseWorkshareLowering we be careful not to pick up
76
- // workshare_loop_wrapper in nested omp.parallel ops
77
- //
78
- // e.g.
79
- //
80
- // omp.parallel {
81
- // omp.workshare {
82
- // omp.parallel {
83
- // omp.workshare {
84
- // omp.workshare_loop_wrapper {}
85
110
return op
86
- ->walk (
87
- [](omp::WorkshareLoopWrapperOp) { return WalkResult::interrupt (); })
111
+ ->walk ([&](Operation *nested) {
112
+ // We need to be careful not to pick up workshare_loop_wrapper in nested
113
+ // omp.parallel{omp.workshare} regions, i.e. make sure that `nested`
114
+ // binds to the workshare region we are currently handling.
115
+ //
116
+ // For example:
117
+ //
118
+ // omp.parallel {
119
+ // omp.workshare { // currently handling this
120
+ // omp.parallel {
121
+ // omp.workshare { // nested workshare
122
+ // omp.workshare_loop_wrapper {}
123
+ //
124
+ // Therefore, we skip if we encounter a nested omp.workshare.
125
+ if (isa<omp::WorkshareOp>(op))
126
+ WalkResult::skip ();
127
+ if (isa<omp::WorkshareLoopWrapperOp>(op))
128
+ WalkResult::interrupt ();
129
+ WalkResult::advance ();
130
+ })
88
131
.wasInterrupted ();
89
132
}
90
133
@@ -340,7 +383,8 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
340
383
// /
341
384
// / becomes
342
385
// /
343
- // / omp.single {
386
+ // / %tmp = fir.alloca
387
+ // / omp.single copyprivate(%tmp) {
344
388
// / %a = fir.allocmem
345
389
// / fir.store %a %tmp
346
390
// / }
@@ -352,16 +396,15 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
352
396
// / }
353
397
// /
354
398
// / Note that we allocate temporary memory for values in omp.single's which need
355
- // / to be accessed in all threads in the closest omp.parallel
399
+ // / to be accessed by all threads and broadcast them using single's copyprivate
356
400
LogicalResult lowerWorkshare (mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
357
401
Location loc = wsOp->getLoc ();
358
402
IRMapping rootMapping;
359
403
360
404
OpBuilder rootBuilder (wsOp);
361
405
362
- // TODO We need something like an scf.execute here, but that is not registered
363
- // so using omp.workshare as a placeholder. We need this op as our
364
- // parallelizeRegion works on regions and not blocks.
406
+ // This operation is just a placeholder which will be erased later. We need it
407
+ // because our `parallelizeRegion` function works on regions and not blocks.
365
408
omp::WorkshareOp newOp =
366
409
rootBuilder.create <omp::WorkshareOp>(loc, omp::WorkshareOperands ());
367
410
if (!wsOp.getNowait ())
0 commit comments