Skip to content

Commit cb45cb0

Browse files
committed
[Flang][Lower] Add lowering support of OpenMP distribute to MLIR
This patch adds support for lowering the OpenMP DISTRIBUTE directive from PFT to MLIR. It only supports standalone DISTRIBUTE, support for composite constructs will come in follow-up PRs.
1 parent 1494d88 commit cb45cb0

File tree

6 files changed

+373
-9
lines changed

6 files changed

+373
-9
lines changed

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,20 @@ bool ClauseProcessor::processDeviceType(
320320
return false;
321321
}
322322

323+
bool ClauseProcessor::processDistSchedule(
324+
Fortran::lower::StatementContext &stmtCtx,
325+
mlir::omp::DistScheduleClauseOps &result) const {
326+
if (auto *clause = findUniqueClause<omp::clause::DistSchedule>()) {
327+
result.distScheduleStaticAttr = converter.getFirOpBuilder().getUnitAttr();
328+
const auto &chunkSize = std::get<std::optional<ExprTy>>(clause->t);
329+
if (chunkSize)
330+
result.distScheduleChunkSizeVar =
331+
fir::getBase(converter.genExprValue(*chunkSize, stmtCtx));
332+
return true;
333+
}
334+
return false;
335+
}
336+
323337
bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
324338
mlir::omp::FinalClauseOps &result) const {
325339
const Fortran::parser::CharBlock *source = nullptr;

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class ClauseProcessor {
6161
bool processDevice(Fortran::lower::StatementContext &stmtCtx,
6262
mlir::omp::DeviceClauseOps &result) const;
6363
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
64+
bool processDistSchedule(Fortran::lower::StatementContext &stmtCtx,
65+
mlir::omp::DistScheduleClauseOps &result) const;
6466
bool processFinal(Fortran::lower::StatementContext &stmtCtx,
6567
mlir::omp::FinalClauseOps &result) const;
6668
bool

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,18 @@ genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter,
10091009
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
10101010
}
10111011

1012+
static void genDistributeClauses(Fortran::lower::AbstractConverter &converter,
1013+
Fortran::semantics::SemanticsContext &semaCtx,
1014+
Fortran::lower::StatementContext &stmtCtx,
1015+
const List<Clause> &clauses,
1016+
mlir::Location loc,
1017+
mlir::omp::DistributeClauseOps &clauseOps) {
1018+
ClauseProcessor cp(converter, semaCtx, clauses);
1019+
cp.processAllocate(clauseOps);
1020+
cp.processDistSchedule(stmtCtx, clauseOps);
1021+
// TODO Support delayed privatization.
1022+
}
1023+
10121024
static void genFlushClauses(Fortran::lower::AbstractConverter &converter,
10131025
Fortran::semantics::SemanticsContext &semaCtx,
10141026
const ObjectList &objects,
@@ -1317,8 +1329,44 @@ genDistributeOp(Fortran::lower::AbstractConverter &converter,
13171329
Fortran::semantics::SemanticsContext &semaCtx,
13181330
Fortran::lower::pft::Evaluation &eval, bool genNested,
13191331
mlir::Location loc, const List<Clause> &clauses) {
1320-
TODO(loc, "Distribute construct");
1321-
return nullptr;
1332+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1333+
DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
1334+
dsp.processStep1();
1335+
1336+
Fortran::lower::StatementContext stmtCtx;
1337+
mlir::omp::LoopNestClauseOps loopClauseOps;
1338+
mlir::omp::DistributeClauseOps distributeClauseOps;
1339+
llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
1340+
genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopClauseOps, iv);
1341+
genDistributeClauses(converter, semaCtx, stmtCtx, clauses, loc,
1342+
distributeClauseOps);
1343+
1344+
// Create omp.distribute wrapper.
1345+
auto distributeOp =
1346+
firOpBuilder.create<mlir::omp::DistributeOp>(loc, distributeClauseOps);
1347+
1348+
firOpBuilder.createBlock(&distributeOp.getRegion());
1349+
firOpBuilder.setInsertionPoint(
1350+
Fortran::lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc));
1351+
1352+
// Create nested omp.loop_nest and fill body with loop contents.
1353+
auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);
1354+
1355+
auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses));
1356+
1357+
auto ivCallback = [&](mlir::Operation *op) {
1358+
genLoopVars(op, converter, loc, iv);
1359+
return iv;
1360+
};
1361+
1362+
createBodyOfOp(*loopOp,
1363+
OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
1364+
*nestedEval, llvm::omp::Directive::OMPD_simd)
1365+
.setClauses(&clauses)
1366+
.setDataSharingProcessor(&dsp)
1367+
.setGenRegionEntryCb(ivCallback));
1368+
1369+
return distributeOp;
13221370
}
13231371

13241372
static mlir::omp::FlushOp
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
! REQUIRES: openmp_runtime
2+
3+
! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
4+
5+
! CHECK-LABEL: func @_QPdistribute_simple
6+
subroutine distribute_simple()
7+
! CHECK: omp.teams
8+
!$omp teams
9+
10+
! CHECK: omp.distribute {
11+
!$omp distribute
12+
13+
! CHECK-NEXT: omp.loop_nest
14+
do i = 1, 10
15+
call foo()
16+
! CHECK: omp.yield
17+
end do
18+
19+
! CHECK: omp.terminator
20+
!$omp end distribute
21+
22+
! CHECK: omp.terminator
23+
!$omp end teams
24+
end subroutine distribute_simple
25+
26+
!===============================================================================
27+
! `dist_schedule` clause
28+
!===============================================================================
29+
30+
! CHECK-LABEL: func @_QPdistribute_dist_schedule
31+
! CHECK-SAME: %[[X_ARG:.*]]: !fir.ref<i32>
32+
subroutine distribute_dist_schedule(x)
33+
! CHECK: %[[X_REF:.*]]:2 = hlfir.declare %[[X_ARG]]
34+
integer, intent(in) :: x
35+
36+
! CHECK: omp.teams
37+
!$omp teams
38+
39+
! STATIC SCHEDULE, CONSTANT CHUNK SIZE
40+
41+
! CHECK: %[[CONST_CHUNK_SIZE:.*]] = arith.constant 5 : i32
42+
! CHECK: omp.distribute
43+
! CHECK-SAME: dist_schedule_static
44+
! CHECK-SAME: chunk_size(%[[CONST_CHUNK_SIZE]] : i32)
45+
!$omp distribute dist_schedule(static, 5)
46+
47+
! CHECK-NEXT: omp.loop_nest
48+
do i = 1, 10
49+
call foo()
50+
! CHECK: omp.yield
51+
end do
52+
53+
! CHECK: omp.terminator
54+
!$omp end distribute
55+
56+
! STATIC SCHEDULE, VARIABLE CHUNK SIZE
57+
58+
! CHECK: %[[X:.*]] = fir.load %[[X_REF]]#0
59+
! CHECK: omp.distribute
60+
! CHECK-SAME: dist_schedule_static
61+
! CHECK-SAME: chunk_size(%[[X]] : i32)
62+
!$omp distribute dist_schedule(static, x)
63+
64+
! CHECK-NEXT: omp.loop_nest
65+
do i = 1, 10
66+
call foo()
67+
! CHECK: omp.yield
68+
end do
69+
70+
! CHECK: omp.terminator
71+
!$omp end distribute
72+
73+
! STATIC SCHEDULE, NO CHUNK SIZE
74+
75+
! CHECK: omp.distribute
76+
! CHECK-SAME: dist_schedule_static
77+
! CHECK-NOT: chunk_size
78+
!$omp distribute dist_schedule(static)
79+
80+
! CHECK-NEXT: omp.loop_nest
81+
do i = 1, 10
82+
call foo()
83+
! CHECK: omp.yield
84+
end do
85+
86+
! CHECK: omp.terminator
87+
!$omp end distribute
88+
89+
! CHECK: omp.terminator
90+
!$omp end teams
91+
end subroutine distribute_dist_schedule
92+
93+
!===============================================================================
94+
! `allocate` clause
95+
!===============================================================================
96+
97+
! CHECK-LABEL: func @_QPdistribute_allocate
98+
subroutine distribute_allocate()
99+
use omp_lib
100+
integer :: x
101+
! CHECK: omp.teams
102+
!$omp teams
103+
104+
! CHECK: omp.distribute
105+
! CHECK-SAME: allocate(%{{.+}} : i32 -> %{{.+}} : !fir.ref<i32>)
106+
!$omp distribute allocate(omp_high_bw_mem_alloc: x) private(x)
107+
108+
! CHECK-NEXT: omp.loop_nest
109+
do i = 1, 10
110+
x = i
111+
! CHECK: omp.yield
112+
end do
113+
114+
! CHECK: omp.terminator
115+
!$omp end distribute
116+
117+
! CHECK: omp.terminator
118+
!$omp end teams
119+
end subroutine distribute_allocate

0 commit comments

Comments
 (0)