Skip to content

Commit dd2880a

Browse files
committed
[flang][OpenMP] Delayed privatization MLIR lowering support for distribute
Starts delayed privatizaiton support for standalone `distribute` directives. Other flavours of `distribute` are still TODO as well as MLIR to LLVM IR lowering.
1 parent 02711a7 commit dd2880a

File tree

3 files changed

+105
-36
lines changed

3 files changed

+105
-36
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,33 @@ mergePrivateVarsInfo(OMPOp op, llvm::ArrayRef<InfoTy> currentList,
485485
infoAccessor);
486486
}
487487

488+
static void
489+
bindSymbolsToRegionArgs(lower::AbstractConverter &converter, mlir::Location loc,
490+
llvm::ArrayRef<const semantics::Symbol *> symbols,
491+
mlir::Region &region, unsigned regionBeginArgIdx) {
492+
assert(regionBeginArgIdx + symbols.size() <= region.getNumArguments());
493+
for (const semantics::Symbol *arg : symbols) {
494+
auto bind = [&](const semantics::Symbol *sym) {
495+
mlir::BlockArgument blockArg = region.getArgument(regionBeginArgIdx);
496+
++regionBeginArgIdx;
497+
converter.bindSymbol(
498+
*sym,
499+
hlfir::translateToExtendedValue(
500+
loc, converter.getFirOpBuilder(), hlfir::Entity{blockArg},
501+
/*contiguousHint=*/
502+
evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
503+
.first);
504+
};
505+
506+
if (const auto *commonDet =
507+
arg->detailsIf<semantics::CommonBlockDetails>()) {
508+
for (const auto &mem : commonDet->objects())
509+
bind(&*mem);
510+
} else
511+
bind(arg);
512+
}
513+
}
514+
488515
//===----------------------------------------------------------------------===//
489516
// Op body generation helper structures and functions
490517
//===----------------------------------------------------------------------===//
@@ -1493,28 +1520,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
14931520
llvm::SmallVector<const semantics::Symbol *> allSymbols(reductionSyms);
14941521
allSymbols.append(dsp->getDelayedPrivSymbols().begin(),
14951522
dsp->getDelayedPrivSymbols().end());
1496-
1497-
unsigned argIdx = 0;
1498-
for (const semantics::Symbol *arg : allSymbols) {
1499-
auto bind = [&](const semantics::Symbol *sym) {
1500-
mlir::BlockArgument blockArg = region.getArgument(argIdx);
1501-
++argIdx;
1502-
converter.bindSymbol(*sym,
1503-
hlfir::translateToExtendedValue(
1504-
loc, firOpBuilder, hlfir::Entity{blockArg},
1505-
/*contiguousHint=*/
1506-
evaluate::IsSimplyContiguous(
1507-
*sym, converter.getFoldingContext()))
1508-
.first);
1509-
};
1510-
1511-
if (const auto *commonDet =
1512-
arg->detailsIf<semantics::CommonBlockDetails>()) {
1513-
for (const auto &mem : commonDet->objects())
1514-
bind(&*mem);
1515-
} else
1516-
bind(arg);
1517-
}
1523+
bindSymbolsToRegionArgs(converter, loc, allSymbols, region, 0);
15181524

15191525
return allSymbols;
15201526
};
@@ -1681,7 +1687,6 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
16811687
mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes,
16821688
devicePtrSyms, devicePtrLocs, devicePtrTypes);
16831689

1684-
llvm::SmallVector<const semantics::Symbol *> privateSyms;
16851690
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
16861691
/*shouldCollectPreDeterminedSymbols=*/
16871692
lower::omp::isLastItemInQueue(item, queue),
@@ -1932,22 +1937,49 @@ static void genStandaloneDistribute(lower::AbstractConverter &converter,
19321937
ConstructQueue::const_iterator item) {
19331938
lower::StatementContext stmtCtx;
19341939

1940+
auto teamsOp = mlir::cast<mlir::omp::TeamsOp>(
1941+
converter.getFirOpBuilder().getInsertionBlock()->getParentOp());
19351942
mlir::omp::DistributeOperands distributeClauseOps;
19361943
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
19371944
distributeClauseOps);
19381945

1939-
// TODO: Support delayed privatization.
1946+
// Privatization for a `distribute` directive is done in the `teams` region to
1947+
// which the directive binds. Therefore, all privatization logic (delayed as
1948+
// well as early) happens **before** the `distribute` op is generated (i.e.
1949+
// inside the parent `teams` op).
19401950
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
19411951
/*shouldCollectPreDeterminedSymbols=*/true,
1942-
/*useDelayedPrivatization=*/false, &symTable);
1943-
dsp.processStep1();
1952+
enableDelayedPrivatizationStaging, &symTable);
1953+
mlir::omp::PrivateClauseOps privateClauseOps;
1954+
dsp.processStep1(&privateClauseOps);
1955+
1956+
if (enableDelayedPrivatizationStaging) {
1957+
mlir::Region &teamsRegion = teamsOp.getRegion();
1958+
unsigned privateVarsArgIdx = teamsRegion.getNumArguments();
1959+
llvm::SmallVector<mlir::Type> privateVarTypes;
1960+
llvm::SmallVector<mlir::Location> privateVarLocs;
1961+
1962+
for (mlir::Value privateVar : privateClauseOps.privateVars) {
1963+
privateVarTypes.push_back(privateVar.getType());
1964+
privateVarLocs.push_back(privateVar.getLoc());
1965+
teamsOp.getPrivateVarsMutable().append(privateVar);
1966+
}
1967+
1968+
teamsOp.setPrivateSymsAttr(
1969+
converter.getFirOpBuilder().getArrayAttr(privateClauseOps.privateSyms));
1970+
teamsRegion.addArguments(privateVarTypes, privateVarLocs);
1971+
1972+
llvm::ArrayRef<const semantics::Symbol *> delayedPrivSyms =
1973+
dsp.getDelayedPrivSymbols();
1974+
bindSymbolsToRegionArgs(converter, loc, delayedPrivSyms, teamsRegion,
1975+
privateVarsArgIdx);
1976+
}
19441977

19451978
mlir::omp::LoopNestOperands loopNestClauseOps;
19461979
llvm::SmallVector<const semantics::Symbol *> iv;
19471980
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
19481981
loopNestClauseOps, iv);
19491982

1950-
// TODO: Populate entry block arguments with private variables.
19511983
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
19521984
converter, loc, distributeClauseOps, /*blockArgTypes=*/{});
19531985

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \
2+
! RUN: -o - %s 2>&1 | FileCheck %s
3+
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \
4+
! RUN: | FileCheck %s
5+
6+
subroutine standalone_distribute
7+
implicit none
8+
integer :: simple_var, i
9+
10+
!$omp teams
11+
!$omp distribute private(simple_var)
12+
do i = 1, 10
13+
simple_var = simple_var + i
14+
end do
15+
!$omp end distribute
16+
!$omp end teams
17+
end subroutine standalone_distribute
18+
19+
! CHECK: omp.private {type = private} @[[I_PRIVATIZER_SYM:.*]] : !fir.ref<i32>
20+
! CHECK: omp.private {type = private} @[[VAR_PRIVATIZER_SYM:.*]] : !fir.ref<i32>
21+
22+
23+
! CHECK-LABEL: func.func @_QPstandalone_distribute() {
24+
! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEi"}
25+
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEsimple_var"}
26+
! CHECK: omp.teams
27+
! CHECK-SAME: private(@[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %[[VAR_ARG:.*]] : !fir.ref<i32>,
28+
! CHECK-SAME: @[[I_PRIVATIZER_SYM]] %[[I_DECL]]#0 -> %[[I_ARG:.*]] : !fir.ref<i32>) {
29+
! CHECK: %[[VAR_PRIV_DECL:.*]]:2 = hlfir.declare %[[VAR_ARG]]
30+
! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_ARG]]
31+
! CHECK: omp.distribute {
32+
! CHECK: omp.loop_nest {{.*}} {
33+
! CHECK: fir.store %{{.*}} to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
34+
! CHECK: %{{.*}} = fir.load %[[VAR_PRIV_DECL]]#0 : !fir.ref<i32>
35+
! CHECK: %{{.*}} = fir.load %[[I_PRIV_DECL]]#0 : !fir.ref<i32>
36+
! CHECK: arith.addi %{{.*}}, %{{.*}} : i32
37+
! CHECK: hlfir.assign %{{.*}} to %[[VAR_PRIV_DECL]]#0 : i32, !fir.ref<i32>
38+
! CHECK: }
39+
! CHECK: }
40+
! CHECK: }
41+
! CHECK: }

mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,17 +1225,13 @@ parsePrivateList(OpAsmParser &parser,
12251225
}
12261226

12271227
static void printPrivateList(OpAsmPrinter &p, Operation *op,
1228-
ValueRange privateVars, TypeRange privateTypes,
1229-
ArrayAttr privateSyms) {
1230-
// TODO: Remove target-specific logic from this function.
1231-
auto targetOp = mlir::dyn_cast<mlir::omp::TargetOp>(op);
1232-
assert(targetOp);
1233-
1228+
Operation::operand_range privateVars,
1229+
TypeRange privateTypes, ArrayAttr privateSyms) {
12341230
auto &region = op->getRegion(0);
12351231
auto *argsBegin = region.front().getArguments().begin();
1236-
MutableArrayRef argsSubrange(argsBegin + targetOp.getMapVars().size(),
1237-
argsBegin + targetOp.getMapVars().size() +
1238-
privateTypes.size());
1232+
MutableArrayRef argsSubrange(argsBegin + privateVars.getBeginOperandIndex(),
1233+
argsBegin + privateVars.getBeginOperandIndex() +
1234+
privateVars.size());
12391235
mlir::SmallVector<bool> isByRefVec;
12401236
isByRefVec.resize(privateTypes.size(), false);
12411237
DenseBoolArrayAttr isByRef =

0 commit comments

Comments
 (0)