Skip to content

Commit c2f0ece

Browse files
committed
[Flang][Lower] Add lowering support of OpenMP distribute to MLIR
This patch adds support for lowering the OpenMP DISTRIBUTE directive from PFT to MLIR. It only supports standalone DISTRIBUTE, support for composite constructs will come in follow-up PRs.
1 parent 1d45235 commit c2f0ece

File tree

6 files changed

+374
-9
lines changed

6 files changed

+374
-9
lines changed

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,20 @@ bool ClauseProcessor::processDeviceType(
317317
return false;
318318
}
319319

320+
bool ClauseProcessor::processDistSchedule(
321+
lower::StatementContext &stmtCtx,
322+
mlir::omp::DistScheduleClauseOps &result) const {
323+
if (auto *clause = findUniqueClause<omp::clause::DistSchedule>()) {
324+
result.distScheduleStaticAttr = converter.getFirOpBuilder().getUnitAttr();
325+
const auto &chunkSize = std::get<std::optional<ExprTy>>(clause->t);
326+
if (chunkSize)
327+
result.distScheduleChunkSizeVar =
328+
fir::getBase(converter.genExprValue(*chunkSize, stmtCtx));
329+
return true;
330+
}
331+
return false;
332+
}
333+
320334
bool ClauseProcessor::processFinal(lower::StatementContext &stmtCtx,
321335
mlir::omp::FinalClauseOps &result) const {
322336
const parser::CharBlock *source = nullptr;

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class ClauseProcessor {
6161
bool processDevice(lower::StatementContext &stmtCtx,
6262
mlir::omp::DeviceClauseOps &result) const;
6363
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
64+
bool processDistSchedule(lower::StatementContext &stmtCtx,
65+
mlir::omp::DistScheduleClauseOps &result) const;
6466
bool processFinal(lower::StatementContext &stmtCtx,
6567
mlir::omp::FinalClauseOps &result) const;
6668
bool processHasDeviceAddr(

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,18 @@ static void genCriticalDeclareClauses(lower::AbstractConverter &converter,
991991
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
992992
}
993993

994+
static void genDistributeClauses(lower::AbstractConverter &converter,
995+
semantics::SemanticsContext &semaCtx,
996+
lower::StatementContext &stmtCtx,
997+
const List<Clause> &clauses,
998+
mlir::Location loc,
999+
mlir::omp::DistributeClauseOps &clauseOps) {
1000+
ClauseProcessor cp(converter, semaCtx, clauses);
1001+
cp.processAllocate(clauseOps);
1002+
cp.processDistSchedule(stmtCtx, clauseOps);
1003+
// TODO Support delayed privatization.
1004+
}
1005+
9941006
static void genFlushClauses(lower::AbstractConverter &converter,
9951007
semantics::SemanticsContext &semaCtx,
9961008
const ObjectList &objects,
@@ -1288,8 +1300,50 @@ genDistributeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
12881300
semantics::SemanticsContext &semaCtx,
12891301
lower::pft::Evaluation &eval, mlir::Location loc,
12901302
const ConstructQueue &queue, ConstructQueue::iterator item) {
1291-
TODO(loc, "Distribute construct");
1292-
return nullptr;
1303+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1304+
symTable.pushScope();
1305+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1306+
lower::omp::isLastItemInQueue(item, queue));
1307+
dsp.processStep1();
1308+
1309+
lower::StatementContext stmtCtx;
1310+
mlir::omp::LoopNestClauseOps loopClauseOps;
1311+
mlir::omp::DistributeClauseOps distributeClauseOps;
1312+
llvm::SmallVector<const semantics::Symbol *> iv;
1313+
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
1314+
loopClauseOps, iv);
1315+
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
1316+
distributeClauseOps);
1317+
1318+
// Create omp.distribute wrapper.
1319+
auto distributeOp =
1320+
firOpBuilder.create<mlir::omp::DistributeOp>(loc, distributeClauseOps);
1321+
1322+
firOpBuilder.createBlock(&distributeOp.getRegion());
1323+
firOpBuilder.setInsertionPoint(
1324+
lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc));
1325+
1326+
// Create nested omp.loop_nest and fill body with loop contents.
1327+
auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);
1328+
1329+
auto *nestedEval =
1330+
getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
1331+
1332+
auto ivCallback = [&](mlir::Operation *op) {
1333+
genLoopVars(op, converter, loc, iv);
1334+
return iv;
1335+
};
1336+
1337+
createBodyOfOp(*loopOp,
1338+
OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
1339+
*nestedEval, llvm::omp::Directive::OMPD_simd)
1340+
.setClauses(&item->clauses)
1341+
.setDataSharingProcessor(&dsp)
1342+
.setGenRegionEntryCb(ivCallback),
1343+
queue, item);
1344+
1345+
symTable.popScope();
1346+
return distributeOp;
12931347
}
12941348

12951349
static mlir::omp::FlushOp
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
! REQUIRES: openmp_runtime
2+
3+
! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s
4+
5+
! CHECK-LABEL: func @_QPdistribute_simple
6+
subroutine distribute_simple()
7+
! CHECK: omp.teams
8+
!$omp teams
9+
10+
! CHECK: omp.distribute {
11+
!$omp distribute
12+
13+
! CHECK-NEXT: omp.loop_nest
14+
do i = 1, 10
15+
call foo()
16+
! CHECK: omp.yield
17+
end do
18+
19+
!$omp end distribute
20+
21+
! CHECK: omp.terminator
22+
!$omp end teams
23+
end subroutine distribute_simple
24+
25+
!===============================================================================
26+
! `dist_schedule` clause
27+
!===============================================================================
28+
29+
! CHECK-LABEL: func @_QPdistribute_dist_schedule
30+
! CHECK-SAME: %[[X_ARG:.*]]: !fir.ref<i32>
31+
subroutine distribute_dist_schedule(x)
32+
! CHECK: %[[X_REF:.*]]:2 = hlfir.declare %[[X_ARG]]
33+
integer, intent(in) :: x
34+
35+
! CHECK: omp.teams
36+
!$omp teams
37+
38+
! STATIC SCHEDULE, CONSTANT CHUNK SIZE
39+
40+
! CHECK: %[[CONST_CHUNK_SIZE:.*]] = arith.constant 5 : i32
41+
! CHECK: omp.distribute
42+
! CHECK-SAME: dist_schedule_static
43+
! CHECK-SAME: chunk_size(%[[CONST_CHUNK_SIZE]] : i32)
44+
!$omp distribute dist_schedule(static, 5)
45+
46+
! CHECK-NEXT: omp.loop_nest
47+
do i = 1, 10
48+
call foo()
49+
! CHECK: omp.yield
50+
end do
51+
52+
!$omp end distribute
53+
54+
! STATIC SCHEDULE, VARIABLE CHUNK SIZE
55+
56+
! CHECK: %[[X:.*]] = fir.load %[[X_REF]]#0
57+
! CHECK: omp.distribute
58+
! CHECK-SAME: dist_schedule_static
59+
! CHECK-SAME: chunk_size(%[[X]] : i32)
60+
!$omp distribute dist_schedule(static, x)
61+
62+
! CHECK-NEXT: omp.loop_nest
63+
do i = 1, 10
64+
call foo()
65+
! CHECK: omp.yield
66+
end do
67+
68+
!$omp end distribute
69+
70+
! STATIC SCHEDULE, NO CHUNK SIZE
71+
72+
! CHECK: omp.distribute
73+
! CHECK-SAME: dist_schedule_static
74+
! CHECK-NOT: chunk_size
75+
!$omp distribute dist_schedule(static)
76+
77+
! CHECK-NEXT: omp.loop_nest
78+
do i = 1, 10
79+
call foo()
80+
! CHECK: omp.yield
81+
end do
82+
83+
!$omp end distribute
84+
85+
! CHECK: omp.terminator
86+
!$omp end teams
87+
end subroutine distribute_dist_schedule
88+
89+
!===============================================================================
90+
! `allocate` clause
91+
!===============================================================================
92+
93+
! CHECK-LABEL: func @_QPdistribute_allocate
94+
subroutine distribute_allocate()
95+
use omp_lib
96+
integer :: x
97+
! CHECK: omp.teams
98+
!$omp teams
99+
100+
! CHECK: omp.distribute
101+
! CHECK-SAME: allocate(%{{.+}} : i64 -> %{{.+}} : !fir.ref<i32>)
102+
!$omp distribute allocate(omp_high_bw_mem_alloc: x) private(x)
103+
104+
! CHECK-NEXT: omp.loop_nest
105+
do i = 1, 10
106+
x = i
107+
! CHECK: omp.yield
108+
end do
109+
110+
!$omp end distribute
111+
112+
! CHECK: omp.terminator
113+
!$omp end teams
114+
end subroutine distribute_allocate

0 commit comments

Comments
 (0)