Skip to content

Commit e2705fe

Browse files
committed
Implement some missing functionality
1 parent 7395392 commit e2705fe

File tree

2 files changed

+81
-35
lines changed

2 files changed

+81
-35
lines changed

flang/include/flang/Optimizer/OpenMP/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ namespace flangomp {
2525
#define GEN_PASS_REGISTRATION
2626
#include "flang/Optimizer/OpenMP/Passes.h.inc"
2727

28+
/// Impelements the logic specified in the 2.8.3 workshare Construct section of
29+
/// the OpenMP standard which specifies what statements or constructs shall be
30+
/// divided into units of work.
2831
bool shouldUseWorkshareLowering(mlir::Operation *op);
2932

3033
} // namespace flangomp

flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp

Lines changed: 78 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
// Lower omp workshare construct.
8+
//
9+
// This file implements the lowering of omp.workshare to other omp constructs.
10+
//
11+
// This pass is tasked with parallelizing the loops nested in
12+
// workshare_loop_wrapper while both the Fortran to mlir lowering and the hlfir
13+
// to fir lowering pipelines are responsible for emitting the
14+
// workshare_loop_wrapper ops where appropriate according to the
15+
// `shouldUseWorkshareLowering` function.
16+
//
917
//===----------------------------------------------------------------------===//
1018

1119
#include <flang/Optimizer/Builder/FIRBuilder.h>
@@ -44,25 +52,52 @@ namespace flangomp {
4452
using namespace mlir;
4553

4654
namespace flangomp {
55+
56+
// Checks for nesting pattern below as we need to avoid sharing the work of
57+
// statements which are nested in some constructs such as omp.critical or
58+
// another omp.parallel.
59+
//
60+
// omp.workshare { // `wsOp`
61+
// ...
62+
// omp.T { // `parent`
63+
// ...
64+
// `op`
65+
//
66+
template <typename T>
67+
static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) {
68+
T parent = op->getParentOfType<T>();
69+
if (!parent)
70+
return false;
71+
return wsOp->isProperAncestor(parent);
72+
}
73+
4774
bool shouldUseWorkshareLowering(Operation *op) {
48-
// TODO this is insufficient, as we could have
49-
// omp.parallel {
50-
// omp.workshare {
51-
// omp.parallel {
52-
// hlfir.elemental {}
53-
//
54-
// Then this hlfir.elemental shall _not_ use the lowering for workshare
55-
//
56-
// Standard says:
57-
// For a parallel construct, the construct is a unit of work with respect to
58-
// the workshare construct. The statements contained in the parallel
59-
// construct are executed by a new thread team.
60-
//
61-
// TODO similarly for single, critical, etc. Need to think through the
62-
// patterns and implement this function.
63-
//
64-
return op->getParentOfType<omp::WorkshareOp>();
75+
auto parentWorkshare = op->getParentOfType<omp::WorkshareOp>();
76+
77+
if (!parentWorkshare)
78+
return false;
79+
80+
if (isNestedIn<omp::CriticalOp>(parentWorkshare, op))
81+
return false;
82+
83+
// 2.8.3 workshare Construct
84+
// For a parallel construct, the construct is a unit of work with respect to
85+
// the workshare construct. The statements contained in the parallel construct
86+
// are executed by a new thread team.
87+
if (isNestedIn<omp::ParallelOp>(parentWorkshare, op))
88+
return false;
89+
90+
// 2.8.2 single Construct
91+
// Binding The binding thread set for a single region is the current team. A
92+
// single region binds to the innermost enclosing parallel region.
93+
// Description Only one of the encountering threads will execute the
94+
// structured block associated with the single construct.
95+
if (isNestedIn<omp::SingleOp>(parentWorkshare, op))
96+
return false;
97+
98+
return true;
6599
}
100+
66101
} // namespace flangomp
67102

68103
namespace {
@@ -72,19 +107,27 @@ struct SingleRegion {
72107
};
73108

74109
static bool mustParallelizeOp(Operation *op) {
75-
// TODO as in shouldUseWorkshareLowering we be careful not to pick up
76-
// workshare_loop_wrapper in nested omp.parallel ops
77-
//
78-
// e.g.
79-
//
80-
// omp.parallel {
81-
// omp.workshare {
82-
// omp.parallel {
83-
// omp.workshare {
84-
// omp.workshare_loop_wrapper {}
85110
return op
86-
->walk(
87-
[](omp::WorkshareLoopWrapperOp) { return WalkResult::interrupt(); })
111+
->walk([&](Operation *nested) {
112+
// We need to be careful not to pick up workshare_loop_wrapper in nested
113+
// omp.parallel{omp.workshare} regions, i.e. make sure that `nested`
114+
// binds to the workshare region we are currently handling.
115+
//
116+
// For example:
117+
//
118+
// omp.parallel {
119+
// omp.workshare { // currently handling this
120+
// omp.parallel {
121+
// omp.workshare { // nested workshare
122+
// omp.workshare_loop_wrapper {}
123+
//
124+
// Therefore, we skip if we encounter a nested omp.workshare.
125+
if (isa<omp::WorkshareOp>(op))
126+
WalkResult::skip();
127+
if (isa<omp::WorkshareLoopWrapperOp>(op))
128+
WalkResult::interrupt();
129+
WalkResult::advance();
130+
})
88131
.wasInterrupted();
89132
}
90133

@@ -340,7 +383,8 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
340383
///
341384
/// becomes
342385
///
343-
/// omp.single {
386+
/// %tmp = fir.alloca
387+
/// omp.single copyprivate(%tmp) {
344388
/// %a = fir.allocmem
345389
/// fir.store %a %tmp
346390
/// }
@@ -352,16 +396,15 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
352396
/// }
353397
///
354398
/// Note that we allocate temporary memory for values in omp.single's which need
355-
/// to be accessed in all threads in the closest omp.parallel
399+
/// to be accessed by all threads and broadcast them using single's copyprivate
356400
LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
357401
Location loc = wsOp->getLoc();
358402
IRMapping rootMapping;
359403

360404
OpBuilder rootBuilder(wsOp);
361405

362-
// TODO We need something like an scf.execute here, but that is not registered
363-
// so using omp.workshare as a placeholder. We need this op as our
364-
// parallelizeRegion works on regions and not blocks.
406+
// This operation is just a placeholder which will be erased later. We need it
407+
// because our `parallelizeRegion` function works on regions and not blocks.
365408
omp::WorkshareOp newOp =
366409
rootBuilder.create<omp::WorkshareOp>(loc, omp::WorkshareOperands());
367410
if (!wsOp.getNowait())

0 commit comments

Comments
 (0)