Skip to content

Commit 6f86d2c

Browse files
authored
[flang][OpenMP] Add support for target ... private
Adds support for the `private` clause in the `target` directive. In order to support that, the `DataSharingProcessor` was also restructured in order to separate the collection of prviate symbols from their actual privatization code-gen. The commit adds both a code-gen and an offloading tests.
2 parents cdc03d8 + aeed526 commit 6f86d2c

File tree

5 files changed

+141
-30
lines changed

5 files changed

+141
-30
lines changed

flang/lib/Lower/OpenMP/DataSharingProcessor.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,20 @@ namespace omp {
2626
void DataSharingProcessor::processStep1() {
2727
collectSymbolsForPrivatization();
2828
collectDefaultSymbols();
29+
}
30+
31+
void DataSharingProcessor::processStep2() {
32+
if (privatizationDone)
33+
return;
34+
2935
privatize();
3036
defaultPrivatize();
3137
insertBarrier();
38+
39+
privatizationDone = true;
3240
}
3341

34-
void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
42+
void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
3543
insPt = firOpBuilder.saveInsertionPoint();
3644
copyLastPrivatize(op);
3745
firOpBuilder.restoreInsertionPoint(insPt);

flang/lib/Lower/OpenMP/DataSharingProcessor.h

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ class DataSharingProcessor {
5454
fir::FirOpBuilder &firOpBuilder;
5555
const Fortran::parser::OmpClauseList &opClauseList;
5656
Fortran::lower::pft::Evaluation &eval;
57+
bool privatizationDone = false;
58+
5759
bool useDelayedPrivatization;
5860
Fortran::lower::SymMap *symTable;
5961
DelayedPrivatizationInfo delayedPrivatizationInfo;
@@ -90,25 +92,44 @@ class DataSharingProcessor {
9092
eval(eval), useDelayedPrivatization(useDelayedPrivatization),
9193
symTable(symTable) {}
9294

93-
// Privatisation is split into two steps.
94-
// Step1 performs cloning of all privatisation clauses and copying for
95-
// firstprivates. Step1 is performed at the place where process/processStep1
95+
// Privatisation is split into 3 steps:
96+
//
97+
// * Step1: collects all symbols that should be privatized.
98+
//
99+
// * Step2: performs cloning of all privatisation clauses and copying for
100+
// firstprivates. Step2 is performed at the place where process/processStep2
96101
// is called. This is usually inside the Operation corresponding to the OpenMP
97-
// construct, for looping constructs this is just before the Operation. The
98-
// split into two steps was performed basically to be able to call
99-
// privatisation for looping constructs before the operation is created since
100-
// the bounds of the MLIR OpenMP operation can be privatised.
101-
// Step2 performs the copying for lastprivates and requires knowledge of the
102-
// MLIR operation to insert the last private update. Step2 adds
102+
// construct, for looping constructs this is just before the Operation.
103+
//
104+
// * Step3: performs the copying for lastprivates and requires knowledge of
105+
// the MLIR operation to insert the last private update. Step3 adds
103106
// dealocation code as well.
107+
//
108+
// The split was performed for the following reasons:
109+
//
110+
// 1. Step1 was split so that the `target` op knows which symbols should not
111+
// be mapped into the target region due to being `private`. The implicit
112+
// mapping happens before the op body is generated so we need to to collect
113+
// the private symbols first and then later in the body actually privatize
114+
// them.
115+
//
116+
// 2. Step2 was split in order to call privatisation for looping constructs
117+
// before the operation is created since the bounds of the MLIR OpenMP
118+
// operation can be privatised.
104119
void processStep1();
105-
void processStep2(mlir::Operation *op, bool isLoop);
120+
void processStep2();
121+
void processStep3(mlir::Operation *op, bool isLoop);
106122

107123
void setLoopIV(mlir::Value iv) {
108124
assert(!loopIV && "Loop iteration variable already set");
109125
loopIV = iv;
110126
}
111127

128+
const llvm::SetVector<const Fortran::semantics::Symbol *> &
129+
getPrivatizedSymbols() const {
130+
return privatizedSymbols;
131+
}
132+
112133
const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
113134
return delayedPrivatizationInfo;
114135
}

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
520520
if (!info.dsp) {
521521
tempDsp.emplace(info.converter, *info.clauses, info.eval);
522522
tempDsp->processStep1();
523+
tempDsp->processStep2();
523524
}
524525
}
525526

@@ -593,11 +594,11 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
593594
if (privatize) {
594595
if (!info.dsp) {
595596
assert(tempDsp.has_value());
596-
tempDsp->processStep2(op, isLoop);
597+
tempDsp->processStep3(op, isLoop);
597598
} else {
598599
if (isLoop && regionArgs.size() > 0)
599600
info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0]));
600-
info.dsp->processStep2(op, isLoop);
601+
info.dsp->processStep3(op, isLoop);
601602
}
602603
}
603604
}
@@ -816,8 +817,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
816817
DataSharingProcessor dsp(converter, clauseList, eval,
817818
/*useDelayedPrivatization=*/true, &symTable);
818819

819-
if (privatize)
820+
if (privatize) {
820821
dsp.processStep1();
822+
dsp.processStep2();
823+
}
824+
821825

822826
const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
823827

@@ -1093,7 +1097,9 @@ static void genBodyOfTargetOp(
10931097
const llvm::SmallVector<mlir::Type> &mapSymTypes,
10941098
const llvm::SmallVector<mlir::Location> &mapSymLocs,
10951099
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
1096-
const mlir::Location &currentLocation) {
1100+
const mlir::Location &currentLocation,
1101+
const Fortran::parser::OmpClauseList &clauseList,
1102+
DataSharingProcessor &dsp) {
10971103
assert(mapSymTypes.size() == mapSymLocs.size());
10981104

10991105
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -1102,6 +1108,8 @@ static void genBodyOfTargetOp(
11021108
auto *regionBlock =
11031109
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
11041110

1111+
dsp.processStep2();
1112+
11051113
// Clones the `bounds` placing them inside the target region and returns them.
11061114
auto cloneBound = [&](mlir::Value bound) {
11071115
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
@@ -1243,7 +1251,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
12431251
Fortran::lower::pft::Evaluation &eval, bool genNested,
12441252
mlir::Location currentLocation,
12451253
const Fortran::parser::OmpClauseList &clauseList,
1246-
llvm::omp::Directive directive, bool outerCombined = false) {
1254+
llvm::omp::Directive directive, bool outerCombined = false,
1255+
DataSharingProcessor *dsp = nullptr) {
12471256
Fortran::lower::StatementContext stmtCtx;
12481257
mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand;
12491258
mlir::UnitAttr nowaitAttr;
@@ -1262,9 +1271,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
12621271
cp.processDepend(dependTypeOperands, dependOperands);
12631272
cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes,
12641273
&mapSymLocs, &mapSymbols);
1265-
1266-
cp.processTODO<Fortran::parser::OmpClause::Private,
1267-
Fortran::parser::OmpClause::Firstprivate,
1274+
cp.processTODO<Fortran::parser::OmpClause::Firstprivate,
12681275
Fortran::parser::OmpClause::IsDevicePtr,
12691276
Fortran::parser::OmpClause::HasDeviceAddr,
12701277
Fortran::parser::OmpClause::InReduction,
@@ -1273,6 +1280,10 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
12731280
Fortran::parser::OmpClause::Defaultmap>(
12741281
currentLocation, llvm::omp::Directive::OMPD_target);
12751282

1283+
DataSharingProcessor localDSP(converter, clauseList, eval);
1284+
DataSharingProcessor &actualDSP = dsp ? *dsp : localDSP;
1285+
actualDSP.processStep1();
1286+
12761287
// Process host-only clauses.
12771288
if (!llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
12781289
.getIsTargetDevice())
@@ -1283,9 +1294,14 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
12831294

12841295
// 5.8.1 Implicit Data-Mapping Attribute Rules
12851296
// The following code follows the implicit data-mapping rules to map all the
1286-
// symbols used inside the region that have not been explicitly mapped using
1287-
// the map clause.
1297+
// symbols used inside the region that do not have explicit data-environment
1298+
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
1299+
// clauses).
12881300
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
1301+
if (actualDSP.getPrivatizedSymbols().contains(&sym)) {
1302+
return;
1303+
}
1304+
12891305
if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
12901306
mlir::Value baseOp = converter.getSymbolAddress(sym);
12911307
if (!baseOp)
@@ -1383,7 +1399,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
13831399
/*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr);
13841400

13851401
genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
1386-
mapSymLocs, mapSymbols, currentLocation);
1402+
mapSymLocs, mapSymbols, currentLocation, clauseList,
1403+
actualDSP);
13871404

13881405
return targetOp;
13891406
}
@@ -1459,7 +1476,8 @@ genDistributeOp(Fortran::lower::AbstractConverter &converter,
14591476
Fortran::lower::pft::Evaluation &eval, bool genNested,
14601477
mlir::Location currentLocation,
14611478
const Fortran::parser::OmpClauseList &clauseList,
1462-
bool outerCombined = false) {
1479+
bool outerCombined = false,
1480+
DataSharingProcessor *dsp = nullptr) {
14631481
// TODO Process clauses
14641482
// ClauseProcessor cp(converter, clauseList);
14651483
// cp.processAllocate(allocatorOperands, allocateOperands);
@@ -1469,7 +1487,8 @@ genDistributeOp(Fortran::lower::AbstractConverter &converter,
14691487
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
14701488
.setGenNested(genNested)
14711489
.setOuterCombined(outerCombined)
1472-
.setClauses(&clauseList),
1490+
.setClauses(&clauseList)
1491+
.setDataSharingProcessor(dsp),
14731492
/*dist_schedule_static=*/nullptr,
14741493
/*chunk_size=*/nullptr,
14751494
/*allocate_vars=*/mlir::ValueRange(),
@@ -1780,6 +1799,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
17801799
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
17811800
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
17821801
dsp.processStep1();
1802+
dsp.processStep2();
17831803

17841804
Fortran::lower::StatementContext stmtCtx;
17851805
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
@@ -1836,10 +1856,10 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
18361856
llvm::omp::Directive ompDirective,
18371857
const Fortran::parser::OmpClauseList &beginClauseList,
18381858
const Fortran::parser::OmpClauseList *endClauseList,
1839-
mlir::Location loc) {
1859+
mlir::Location loc, DataSharingProcessor &dsp) {
18401860
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1841-
DataSharingProcessor dsp(converter, beginClauseList, eval);
18421861
dsp.processStep1();
1862+
dsp.processStep2();
18431863

18441864
Fortran::lower::StatementContext stmtCtx;
18451865
mlir::Value scheduleChunkClauseOperand;
@@ -1962,8 +1982,9 @@ static void createSimdWsLoop(
19621982
// When support for vectorization is enabled, then we need to add handling of
19631983
// if clause. Currently if clause can be skipped because we always assume
19641984
// SIMD length = 1.
1985+
DataSharingProcessor dsp(converter, beginClauseList, eval);
19651986
createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList,
1966-
endClauseList, loc);
1987+
endClauseList, loc, dsp);
19671988
}
19681989

19691990
static void genOMP(Fortran::lower::AbstractConverter &converter,
@@ -1992,6 +2013,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
19922013
}();
19932014

19942015
bool validDirective = false;
2016+
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
2017+
19952018
if (llvm::omp::topTaskloopSet.test(ompDirective)) {
19962019
validDirective = true;
19972020
TODO(currentLocation, "Taskloop construct");
@@ -2002,7 +2025,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
20022025
validDirective = true;
20032026
genTargetOp(converter, semaCtx, eval, /*genNested=*/false,
20042027
currentLocation, loopOpClauseList, ompDirective,
2005-
/*outerCombined=*/true);
2028+
/*outerCombined=*/true, &dsp);
20062029
}
20072030
if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet)
20082031
.test(ompDirective)) {
@@ -2014,7 +2037,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
20142037
validDirective = true;
20152038
bool outerCombined = llvm::omp::topDistributeSet.test(ompDirective);
20162039
genDistributeOp(converter, semaCtx, eval, /*genNested=*/false,
2017-
currentLocation, loopOpClauseList, outerCombined);
2040+
currentLocation, loopOpClauseList, outerCombined, &dsp);
20182041
}
20192042
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
20202043
.test(ompDirective)) {
@@ -2045,7 +2068,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
20452068
genOpenMPReduction(converter, semaCtx, loopOpClauseList);
20462069
} else {
20472070
createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
2048-
endClauseList, currentLocation);
2071+
endClauseList, currentLocation, dsp);
20492072
}
20502073
}
20512074

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
!Test data-sharing attribute clauses for the `target` directive.
2+
3+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
4+
5+
!CHECK-LABEL: func.func @_QPomp_target_private()
6+
subroutine omp_target_private
7+
implicit none
8+
integer :: x(1)
9+
10+
!$omp target private(x)
11+
x(1) = 42
12+
!$omp end target
13+
!CHECK: omp.target {
14+
!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
15+
!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
16+
!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"}
17+
!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
18+
!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
19+
!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} :
20+
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
21+
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
22+
!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32
23+
!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
24+
!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
25+
!CHECK-SAME: : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
26+
!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
27+
!CHECK-NEXT: omp.terminator
28+
!CHECK-NEXT: }
29+
30+
end subroutine omp_target_private
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
! Basic offloading test with a target region
2+
! REQUIRES: flang
3+
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
4+
! UNSUPPORTED: aarch64-unknown-linux-gnu
5+
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
6+
! UNSUPPORTED: x86_64-pc-linux-gnu
7+
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
8+
9+
! RUN: %libomptarget-compile-fortran-generic
10+
! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
11+
program target_update
12+
implicit none
13+
integer :: x(1)
14+
integer :: y(1)
15+
16+
x(1) = 42
17+
18+
!$omp target private(x) map(tofrom: y)
19+
x(1) = 84
20+
y(1) = x(1)
21+
!$omp end target
22+
23+
print *, "x =", x(1)
24+
print *, "y =", y(1)
25+
26+
end program target_update
27+
! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
28+
! CHECK: x = 42
29+
! CHECK: y = 84

0 commit comments

Comments
 (0)