Skip to content

Commit 23983c7

Browse files
committed
[Flang][OpenMP] Move loop privatization out of dispatch
This patch moves the creation of `DataSharingProcessor` instances for loop constructs out of `genOMPDispatch()` and into their corresponding codegen functions. This is a necessary first step to enable a proper handling of privatization on composite constructs. Some tests are updated due to a change of order between clause processing and privatization.
1 parent c2d8afb commit 23983c7

File tree

6 files changed

+127
-101
lines changed

6 files changed

+127
-101
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 84 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,6 @@ static void genDistributeClauses(lower::AbstractConverter &converter,
10441044
cp.processAllocate(clauseOps);
10451045
cp.processDistSchedule(stmtCtx, clauseOps);
10461046
cp.processOrder(clauseOps);
1047-
// TODO Support delayed privatization.
10481047
}
10491048

10501049
static void genFlushClauses(lower::AbstractConverter &converter,
@@ -1128,7 +1127,6 @@ static void genSimdClauses(lower::AbstractConverter &converter,
11281127
cp.processSafelen(clauseOps);
11291128
cp.processSimdlen(clauseOps);
11301129

1131-
// TODO Support delayed privatization.
11321130
cp.processTODO<clause::Linear, clause::Nontemporal>(
11331131
loc, llvm::omp::Directive::OMPD_simd);
11341132
}
@@ -1299,7 +1297,6 @@ static void genWsloopClauses(
12991297
cp.processOrdered(clauseOps);
13001298
cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms);
13011299
cp.processSchedule(stmtCtx, clauseOps);
1302-
// TODO Support delayed privatization.
13031300

13041301
cp.processTODO<clause::Allocate, clause::Linear>(
13051302
loc, llvm::omp::Directive::OMPD_do);
@@ -1918,17 +1915,25 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
19181915
// also be a leaf of a composite construct
19191916
//===----------------------------------------------------------------------===//
19201917

1921-
static void genStandaloneDistribute(
1922-
lower::AbstractConverter &converter, lower::SymMap &symTable,
1923-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1924-
mlir::Location loc, const ConstructQueue &queue,
1925-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
1918+
static void genStandaloneDistribute(lower::AbstractConverter &converter,
1919+
lower::SymMap &symTable,
1920+
semantics::SemanticsContext &semaCtx,
1921+
lower::pft::Evaluation &eval,
1922+
mlir::Location loc,
1923+
const ConstructQueue &queue,
1924+
ConstructQueue::const_iterator item) {
19261925
lower::StatementContext stmtCtx;
19271926

19281927
mlir::omp::DistributeOperands distributeClauseOps;
19291928
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
19301929
distributeClauseOps);
19311930

1931+
// TODO: Support delayed privatization.
1932+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1933+
/*shouldCollectPreDeterminedSymbols=*/true,
1934+
/*useDelayedPrivatization=*/false, &symTable);
1935+
dsp.processStep1();
1936+
19321937
mlir::omp::LoopNestOperands loopNestClauseOps;
19331938
llvm::SmallVector<const semantics::Symbol *> iv;
19341939
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -1949,8 +1954,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
19491954
semantics::SemanticsContext &semaCtx,
19501955
lower::pft::Evaluation &eval, mlir::Location loc,
19511956
const ConstructQueue &queue,
1952-
ConstructQueue::const_iterator item,
1953-
DataSharingProcessor &dsp) {
1957+
ConstructQueue::const_iterator item) {
19541958
lower::StatementContext stmtCtx;
19551959

19561960
mlir::omp::WsloopOperands wsloopClauseOps;
@@ -1959,6 +1963,12 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
19591963
genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
19601964
wsloopClauseOps, reductionTypes, reductionSyms);
19611965

1966+
// TODO: Support delayed privatization.
1967+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1968+
/*shouldCollectPreDeterminedSymbols=*/true,
1969+
/*useDelayedPrivatization=*/false, &symTable);
1970+
dsp.processStep1();
1971+
19621972
mlir::omp::LoopNestOperands loopNestClauseOps;
19631973
llvm::SmallVector<const semantics::Symbol *> iv;
19641974
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -1998,11 +2008,16 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
19982008
semantics::SemanticsContext &semaCtx,
19992009
lower::pft::Evaluation &eval, mlir::Location loc,
20002010
const ConstructQueue &queue,
2001-
ConstructQueue::const_iterator item,
2002-
DataSharingProcessor &dsp) {
2011+
ConstructQueue::const_iterator item) {
20032012
mlir::omp::SimdOperands simdClauseOps;
20042013
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
20052014

2015+
// TODO: Support delayed privatization.
2016+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2017+
/*shouldCollectPreDeterminedSymbols=*/true,
2018+
/*useDelayedPrivatization=*/false, &symTable);
2019+
dsp.processStep1();
2020+
20062021
mlir::omp::LoopNestOperands loopNestClauseOps;
20072022
llvm::SmallVector<const semantics::Symbol *> iv;
20082023
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -2018,11 +2033,13 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
20182033
llvm::omp::Directive::OMPD_simd, dsp);
20192034
}
20202035

2021-
static void genStandaloneTaskloop(
2022-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2023-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2024-
mlir::Location loc, const ConstructQueue &queue,
2025-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2036+
static void genStandaloneTaskloop(lower::AbstractConverter &converter,
2037+
lower::SymMap &symTable,
2038+
semantics::SemanticsContext &semaCtx,
2039+
lower::pft::Evaluation &eval,
2040+
mlir::Location loc,
2041+
const ConstructQueue &queue,
2042+
ConstructQueue::const_iterator item) {
20262043
TODO(loc, "Taskloop construct");
20272044
}
20282045

@@ -2034,7 +2051,7 @@ static void genCompositeDistributeParallelDo(
20342051
lower::AbstractConverter &converter, lower::SymMap &symTable,
20352052
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
20362053
mlir::Location loc, const ConstructQueue &queue,
2037-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2054+
ConstructQueue::const_iterator item) {
20382055
assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
20392056
TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
20402057
}
@@ -2043,16 +2060,18 @@ static void genCompositeDistributeParallelDoSimd(
20432060
lower::AbstractConverter &converter, lower::SymMap &symTable,
20442061
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
20452062
mlir::Location loc, const ConstructQueue &queue,
2046-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2063+
ConstructQueue::const_iterator item) {
20472064
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
20482065
TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
20492066
}
20502067

2051-
static void genCompositeDistributeSimd(
2052-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2053-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2054-
mlir::Location loc, const ConstructQueue &queue,
2055-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2068+
static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
2069+
lower::SymMap &symTable,
2070+
semantics::SemanticsContext &semaCtx,
2071+
lower::pft::Evaluation &eval,
2072+
mlir::Location loc,
2073+
const ConstructQueue &queue,
2074+
ConstructQueue::const_iterator item) {
20562075
lower::StatementContext stmtCtx;
20572076

20582077
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2067,6 +2086,12 @@ static void genCompositeDistributeSimd(
20672086
mlir::omp::SimdOperands simdClauseOps;
20682087
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
20692088

2089+
// TODO: Support delayed privatization.
2090+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2091+
/*shouldCollectPreDeterminedSymbols=*/true,
2092+
/*useDelayedPrivatization=*/false, &symTable);
2093+
dsp.processStep1();
2094+
20702095
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
20712096
// is placed by construct decomposition.
20722097
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2103,8 +2128,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21032128
semantics::SemanticsContext &semaCtx,
21042129
lower::pft::Evaluation &eval, mlir::Location loc,
21052130
const ConstructQueue &queue,
2106-
ConstructQueue::const_iterator item,
2107-
DataSharingProcessor &dsp) {
2131+
ConstructQueue::const_iterator item) {
21082132
lower::StatementContext stmtCtx;
21092133

21102134
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2121,6 +2145,12 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21212145
mlir::omp::SimdOperands simdClauseOps;
21222146
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
21232147

2148+
// TODO: Support delayed privatization.
2149+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2150+
/*shouldCollectPreDeterminedSymbols=*/true,
2151+
/*useDelayedPrivatization=*/false, &symTable);
2152+
dsp.processStep1();
2153+
21242154
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
21252155
// is placed by construct decomposition.
21262156
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2151,11 +2181,13 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21512181
llvm::omp::Directive::OMPD_do_simd, dsp);
21522182
}
21532183

2154-
static void genCompositeTaskloopSimd(
2155-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2156-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2157-
mlir::Location loc, const ConstructQueue &queue,
2158-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2184+
static void genCompositeTaskloopSimd(lower::AbstractConverter &converter,
2185+
lower::SymMap &symTable,
2186+
semantics::SemanticsContext &semaCtx,
2187+
lower::pft::Evaluation &eval,
2188+
mlir::Location loc,
2189+
const ConstructQueue &queue,
2190+
ConstructQueue::const_iterator item) {
21592191
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
21602192
TODO(loc, "Composite TASKLOOP SIMD");
21612193
}
@@ -2164,30 +2196,35 @@ static void genCompositeTaskloopSimd(
21642196
// Dispatch
21652197
//===----------------------------------------------------------------------===//
21662198

2167-
static bool genOMPCompositeDispatch(
2168-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2169-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2170-
mlir::Location loc, const ConstructQueue &queue,
2171-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2199+
static bool genOMPCompositeDispatch(lower::AbstractConverter &converter,
2200+
lower::SymMap &symTable,
2201+
semantics::SemanticsContext &semaCtx,
2202+
lower::pft::Evaluation &eval,
2203+
mlir::Location loc,
2204+
const ConstructQueue &queue,
2205+
ConstructQueue::const_iterator item) {
21722206
using llvm::omp::Directive;
21732207
using lower::omp::matchLeafSequence;
21742208

2209+
// TODO: Privatization for composite constructs is currently only done based
2210+
// on the clauses for their last leaf construct, which may not always be
2211+
// correct. Consider per-leaf privatization of composite constructs once
2212+
// delayed privatization is supported by all participating ops.
21752213
if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
21762214
genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
2177-
queue, item, dsp);
2215+
queue, item);
21782216
else if (matchLeafSequence(item, queue,
21792217
Directive::OMPD_distribute_parallel_do_simd))
21802218
genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
2181-
loc, queue, item, dsp);
2219+
loc, queue, item);
21822220
else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
21832221
genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
2184-
item, dsp);
2222+
item);
21852223
else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
2186-
genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
2187-
dsp);
2224+
genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item);
21882225
else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
21892226
genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
2190-
item, dsp);
2227+
item);
21912228
else
21922229
return false;
21932230

@@ -2202,20 +2239,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22022239
ConstructQueue::const_iterator item) {
22032240
assert(item != queue.end());
22042241

2205-
std::optional<DataSharingProcessor> loopDsp;
22062242
bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
22072243
llvm::omp::Association::Loop;
22082244
if (loopLeaf) {
22092245
symTable.pushScope();
2210-
// TODO: Use one DataSharingProcessor for each leaf of a composite
2211-
// construct.
2212-
loopDsp.emplace(converter, semaCtx, item->clauses, eval,
2213-
/*shouldCollectPreDeterminedSymbols=*/true,
2214-
/*useDelayedPrivatization=*/false, &symTable);
2215-
loopDsp->processStep1();
2216-
22172246
if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue,
2218-
item, *loopDsp)) {
2247+
item)) {
22192248
symTable.popScope();
22202249
return;
22212250
}
@@ -2227,11 +2256,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22272256
break;
22282257
case llvm::omp::Directive::OMPD_distribute:
22292258
genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue,
2230-
item, *loopDsp);
2259+
item);
22312260
break;
22322261
case llvm::omp::Directive::OMPD_do:
2233-
genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item,
2234-
*loopDsp);
2262+
genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item);
22352263
break;
22362264
case llvm::omp::Directive::OMPD_loop:
22372265
TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
@@ -2260,8 +2288,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22602288
// in genBodyOfOp
22612289
break;
22622290
case llvm::omp::Directive::OMPD_simd:
2263-
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item,
2264-
*loopDsp);
2291+
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
22652292
break;
22662293
case llvm::omp::Directive::OMPD_single:
22672294
genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
@@ -2291,8 +2318,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22912318
genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
22922319
break;
22932320
case llvm::omp::Directive::OMPD_taskloop:
2294-
genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item,
2295-
*loopDsp);
2321+
genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item);
22962322
break;
22972323
case llvm::omp::Directive::OMPD_taskwait:
22982324
genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);

flang/test/Lower/OpenMP/parallel-reduction3.f90

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,19 @@
6969
! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32
7070
! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>>
7171
! CHECK: omp.parallel {
72-
! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
73-
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
74-
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
75-
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
72+
! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
73+
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
74+
! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
75+
! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
7676
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
7777
! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32
7878
! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32
79-
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
79+
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
8080
! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
8181
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
82-
! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32>
82+
! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32>
8383
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
84-
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
84+
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32>
8585
! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index
8686
! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
8787
! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1>

0 commit comments

Comments
 (0)