Skip to content

Commit a187281

Browse files
committed
[Flang][OpenMP] Move loop privatization out of dispatch
This patch moves the creation of `DataSharingProcessor` instances for loop constructs out of `genOMPDispatch()` and into their corresponding codegen functions. This is a necessary first step to enable a proper handling of privatization on composite constructs. Some tests are updated due to a change of order between clause processing and privatization.
1 parent 2784060 commit a187281

File tree

6 files changed

+127
-101
lines changed

6 files changed

+127
-101
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 84 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,6 @@ static void genDistributeClauses(lower::AbstractConverter &converter,
10441044
cp.processAllocate(clauseOps);
10451045
cp.processDistSchedule(stmtCtx, clauseOps);
10461046
cp.processOrder(clauseOps);
1047-
// TODO Support delayed privatization.
10481047
}
10491048

10501049
static void genFlushClauses(lower::AbstractConverter &converter,
@@ -1128,7 +1127,6 @@ static void genSimdClauses(lower::AbstractConverter &converter,
11281127
cp.processSafelen(clauseOps);
11291128
cp.processSimdlen(clauseOps);
11301129

1131-
// TODO Support delayed privatization.
11321130
cp.processTODO<clause::Linear, clause::Nontemporal>(
11331131
loc, llvm::omp::Directive::OMPD_simd);
11341132
}
@@ -1299,7 +1297,6 @@ static void genWsloopClauses(
12991297
cp.processOrdered(clauseOps);
13001298
cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms);
13011299
cp.processSchedule(stmtCtx, clauseOps);
1302-
// TODO Support delayed privatization.
13031300

13041301
cp.processTODO<clause::Allocate, clause::Linear>(
13051302
loc, llvm::omp::Directive::OMPD_do);
@@ -1924,17 +1921,25 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
19241921
// also be a leaf of a composite construct
19251922
//===----------------------------------------------------------------------===//
19261923

1927-
static void genStandaloneDistribute(
1928-
lower::AbstractConverter &converter, lower::SymMap &symTable,
1929-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1930-
mlir::Location loc, const ConstructQueue &queue,
1931-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
1924+
static void genStandaloneDistribute(lower::AbstractConverter &converter,
1925+
lower::SymMap &symTable,
1926+
semantics::SemanticsContext &semaCtx,
1927+
lower::pft::Evaluation &eval,
1928+
mlir::Location loc,
1929+
const ConstructQueue &queue,
1930+
ConstructQueue::const_iterator item) {
19321931
lower::StatementContext stmtCtx;
19331932

19341933
mlir::omp::DistributeOperands distributeClauseOps;
19351934
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
19361935
distributeClauseOps);
19371936

1937+
// TODO: Support delayed privatization.
1938+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1939+
/*shouldCollectPreDeterminedSymbols=*/true,
1940+
/*useDelayedPrivatization=*/false, &symTable);
1941+
dsp.processStep1();
1942+
19381943
mlir::omp::LoopNestOperands loopNestClauseOps;
19391944
llvm::SmallVector<const semantics::Symbol *> iv;
19401945
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -1955,8 +1960,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
19551960
semantics::SemanticsContext &semaCtx,
19561961
lower::pft::Evaluation &eval, mlir::Location loc,
19571962
const ConstructQueue &queue,
1958-
ConstructQueue::const_iterator item,
1959-
DataSharingProcessor &dsp) {
1963+
ConstructQueue::const_iterator item) {
19601964
lower::StatementContext stmtCtx;
19611965

19621966
mlir::omp::WsloopOperands wsloopClauseOps;
@@ -1965,6 +1969,12 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
19651969
genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
19661970
wsloopClauseOps, reductionTypes, reductionSyms);
19671971

1972+
// TODO: Support delayed privatization.
1973+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1974+
/*shouldCollectPreDeterminedSymbols=*/true,
1975+
/*useDelayedPrivatization=*/false, &symTable);
1976+
dsp.processStep1();
1977+
19681978
mlir::omp::LoopNestOperands loopNestClauseOps;
19691979
llvm::SmallVector<const semantics::Symbol *> iv;
19701980
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -2004,11 +2014,16 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
20042014
semantics::SemanticsContext &semaCtx,
20052015
lower::pft::Evaluation &eval, mlir::Location loc,
20062016
const ConstructQueue &queue,
2007-
ConstructQueue::const_iterator item,
2008-
DataSharingProcessor &dsp) {
2017+
ConstructQueue::const_iterator item) {
20092018
mlir::omp::SimdOperands simdClauseOps;
20102019
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps);
20112020

2021+
// TODO: Support delayed privatization.
2022+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2023+
/*shouldCollectPreDeterminedSymbols=*/true,
2024+
/*useDelayedPrivatization=*/false, &symTable);
2025+
dsp.processStep1();
2026+
20122027
mlir::omp::LoopNestOperands loopNestClauseOps;
20132028
llvm::SmallVector<const semantics::Symbol *> iv;
20142029
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
@@ -2024,11 +2039,13 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
20242039
llvm::omp::Directive::OMPD_simd, dsp);
20252040
}
20262041

2027-
static void genStandaloneTaskloop(
2028-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2029-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2030-
mlir::Location loc, const ConstructQueue &queue,
2031-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2042+
static void genStandaloneTaskloop(lower::AbstractConverter &converter,
2043+
lower::SymMap &symTable,
2044+
semantics::SemanticsContext &semaCtx,
2045+
lower::pft::Evaluation &eval,
2046+
mlir::Location loc,
2047+
const ConstructQueue &queue,
2048+
ConstructQueue::const_iterator item) {
20322049
TODO(loc, "Taskloop construct");
20332050
}
20342051

@@ -2040,7 +2057,7 @@ static void genCompositeDistributeParallelDo(
20402057
lower::AbstractConverter &converter, lower::SymMap &symTable,
20412058
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
20422059
mlir::Location loc, const ConstructQueue &queue,
2043-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2060+
ConstructQueue::const_iterator item) {
20442061
assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
20452062
TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
20462063
}
@@ -2049,16 +2066,18 @@ static void genCompositeDistributeParallelDoSimd(
20492066
lower::AbstractConverter &converter, lower::SymMap &symTable,
20502067
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
20512068
mlir::Location loc, const ConstructQueue &queue,
2052-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2069+
ConstructQueue::const_iterator item) {
20532070
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
20542071
TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
20552072
}
20562073

2057-
static void genCompositeDistributeSimd(
2058-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2059-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2060-
mlir::Location loc, const ConstructQueue &queue,
2061-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2074+
static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
2075+
lower::SymMap &symTable,
2076+
semantics::SemanticsContext &semaCtx,
2077+
lower::pft::Evaluation &eval,
2078+
mlir::Location loc,
2079+
const ConstructQueue &queue,
2080+
ConstructQueue::const_iterator item) {
20622081
lower::StatementContext stmtCtx;
20632082

20642083
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2073,6 +2092,12 @@ static void genCompositeDistributeSimd(
20732092
mlir::omp::SimdOperands simdClauseOps;
20742093
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
20752094

2095+
// TODO: Support delayed privatization.
2096+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2097+
/*shouldCollectPreDeterminedSymbols=*/true,
2098+
/*useDelayedPrivatization=*/false, &symTable);
2099+
dsp.processStep1();
2100+
20762101
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
20772102
// is placed by construct decomposition.
20782103
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2109,8 +2134,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21092134
semantics::SemanticsContext &semaCtx,
21102135
lower::pft::Evaluation &eval, mlir::Location loc,
21112136
const ConstructQueue &queue,
2112-
ConstructQueue::const_iterator item,
2113-
DataSharingProcessor &dsp) {
2137+
ConstructQueue::const_iterator item) {
21142138
lower::StatementContext stmtCtx;
21152139

21162140
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
@@ -2127,6 +2151,12 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21272151
mlir::omp::SimdOperands simdClauseOps;
21282152
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps);
21292153

2154+
// TODO: Support delayed privatization.
2155+
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2156+
/*shouldCollectPreDeterminedSymbols=*/true,
2157+
/*useDelayedPrivatization=*/false, &symTable);
2158+
dsp.processStep1();
2159+
21302160
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
21312161
// is placed by construct decomposition.
21322162
mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2157,11 +2187,13 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
21572187
llvm::omp::Directive::OMPD_do_simd, dsp);
21582188
}
21592189

2160-
static void genCompositeTaskloopSimd(
2161-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2162-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2163-
mlir::Location loc, const ConstructQueue &queue,
2164-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2190+
static void genCompositeTaskloopSimd(lower::AbstractConverter &converter,
2191+
lower::SymMap &symTable,
2192+
semantics::SemanticsContext &semaCtx,
2193+
lower::pft::Evaluation &eval,
2194+
mlir::Location loc,
2195+
const ConstructQueue &queue,
2196+
ConstructQueue::const_iterator item) {
21652197
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
21662198
TODO(loc, "Composite TASKLOOP SIMD");
21672199
}
@@ -2170,30 +2202,35 @@ static void genCompositeTaskloopSimd(
21702202
// Dispatch
21712203
//===----------------------------------------------------------------------===//
21722204

2173-
static bool genOMPCompositeDispatch(
2174-
lower::AbstractConverter &converter, lower::SymMap &symTable,
2175-
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2176-
mlir::Location loc, const ConstructQueue &queue,
2177-
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
2205+
static bool genOMPCompositeDispatch(lower::AbstractConverter &converter,
2206+
lower::SymMap &symTable,
2207+
semantics::SemanticsContext &semaCtx,
2208+
lower::pft::Evaluation &eval,
2209+
mlir::Location loc,
2210+
const ConstructQueue &queue,
2211+
ConstructQueue::const_iterator item) {
21782212
using llvm::omp::Directive;
21792213
using lower::omp::matchLeafSequence;
21802214

2215+
// TODO: Privatization for composite constructs is currently only done based
2216+
// on the clauses for their last leaf construct, which may not always be
2217+
// correct. Consider per-leaf privatization of composite constructs once
2218+
// delayed privatization is supported by all participating ops.
21812219
if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
21822220
genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
2183-
queue, item, dsp);
2221+
queue, item);
21842222
else if (matchLeafSequence(item, queue,
21852223
Directive::OMPD_distribute_parallel_do_simd))
21862224
genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
2187-
loc, queue, item, dsp);
2225+
loc, queue, item);
21882226
else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
21892227
genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
2190-
item, dsp);
2228+
item);
21912229
else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
2192-
genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
2193-
dsp);
2230+
genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item);
21942231
else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
21952232
genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
2196-
item, dsp);
2233+
item);
21972234
else
21982235
return false;
21992236

@@ -2208,20 +2245,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22082245
ConstructQueue::const_iterator item) {
22092246
assert(item != queue.end());
22102247

2211-
std::optional<DataSharingProcessor> loopDsp;
22122248
bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
22132249
llvm::omp::Association::Loop;
22142250
if (loopLeaf) {
22152251
symTable.pushScope();
2216-
// TODO: Use one DataSharingProcessor for each leaf of a composite
2217-
// construct.
2218-
loopDsp.emplace(converter, semaCtx, item->clauses, eval,
2219-
/*shouldCollectPreDeterminedSymbols=*/true,
2220-
/*useDelayedPrivatization=*/false, &symTable);
2221-
loopDsp->processStep1();
2222-
22232252
if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue,
2224-
item, *loopDsp)) {
2253+
item)) {
22252254
symTable.popScope();
22262255
return;
22272256
}
@@ -2233,11 +2262,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22332262
break;
22342263
case llvm::omp::Directive::OMPD_distribute:
22352264
genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue,
2236-
item, *loopDsp);
2265+
item);
22372266
break;
22382267
case llvm::omp::Directive::OMPD_do:
2239-
genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item,
2240-
*loopDsp);
2268+
genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item);
22412269
break;
22422270
case llvm::omp::Directive::OMPD_loop:
22432271
TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
@@ -2266,8 +2294,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22662294
// in genBodyOfOp
22672295
break;
22682296
case llvm::omp::Directive::OMPD_simd:
2269-
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item,
2270-
*loopDsp);
2297+
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
22712298
break;
22722299
case llvm::omp::Directive::OMPD_single:
22732300
genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
@@ -2297,8 +2324,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
22972324
genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
22982325
break;
22992326
case llvm::omp::Directive::OMPD_taskloop:
2300-
genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item,
2301-
*loopDsp);
2327+
genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item);
23022328
break;
23032329
case llvm::omp::Directive::OMPD_taskwait:
23042330
genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);

flang/test/Lower/OpenMP/parallel-reduction3.f90

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,19 @@
6969
! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32
7070
! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>>
7171
! CHECK: omp.parallel {
72-
! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
73-
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
74-
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
75-
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
72+
! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
73+
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
74+
! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
75+
! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
7676
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
7777
! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32
7878
! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32
79-
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
79+
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
8080
! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
8181
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
82-
! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32>
82+
! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32>
8383
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
84-
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
84+
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32>
8585
! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index
8686
! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
8787
! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1>

0 commit comments

Comments
 (0)