Skip to content

Commit fc1c34b

Browse files
authored
[Flang][OpenMP][Lower] Add lowering support of OpenMP distribute to MLIR (#67798)
This patch adds support for lowering the OpenMP DISTRIBUTE directive from PFT to MLIR. It only supports standalone DISTRIBUTE, support for composite constructs will come in follow-up PRs.
1 parent 1d45235 commit fc1c34b

File tree

6 files changed

+374
-9
lines changed

6 files changed

+374
-9
lines changed

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,20 @@ bool ClauseProcessor::processDeviceType(
317317
return false;
318318
}
319319

320+
bool ClauseProcessor::processDistSchedule(
321+
lower::StatementContext &stmtCtx,
322+
mlir::omp::DistScheduleClauseOps &result) const {
323+
if (auto *clause = findUniqueClause<omp::clause::DistSchedule>()) {
324+
result.distScheduleStaticAttr = converter.getFirOpBuilder().getUnitAttr();
325+
const auto &chunkSize = std::get<std::optional<ExprTy>>(clause->t);
326+
if (chunkSize)
327+
result.distScheduleChunkSizeVar =
328+
fir::getBase(converter.genExprValue(*chunkSize, stmtCtx));
329+
return true;
330+
}
331+
return false;
332+
}
333+
320334
bool ClauseProcessor::processFinal(lower::StatementContext &stmtCtx,
321335
mlir::omp::FinalClauseOps &result) const {
322336
const parser::CharBlock *source = nullptr;

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class ClauseProcessor {
6161
bool processDevice(lower::StatementContext &stmtCtx,
6262
mlir::omp::DeviceClauseOps &result) const;
6363
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
64+
bool processDistSchedule(lower::StatementContext &stmtCtx,
65+
mlir::omp::DistScheduleClauseOps &result) const;
6466
bool processFinal(lower::StatementContext &stmtCtx,
6567
mlir::omp::FinalClauseOps &result) const;
6668
bool processHasDeviceAddr(

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,18 @@ static void genCriticalDeclareClauses(lower::AbstractConverter &converter,
991991
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
992992
}
993993

994+
static void genDistributeClauses(lower::AbstractConverter &converter,
995+
semantics::SemanticsContext &semaCtx,
996+
lower::StatementContext &stmtCtx,
997+
const List<Clause> &clauses,
998+
mlir::Location loc,
999+
mlir::omp::DistributeClauseOps &clauseOps) {
1000+
ClauseProcessor cp(converter, semaCtx, clauses);
1001+
cp.processAllocate(clauseOps);
1002+
cp.processDistSchedule(stmtCtx, clauseOps);
1003+
// TODO Support delayed privatization.
1004+
}
1005+
9941006
static void genFlushClauses(lower::AbstractConverter &converter,
9951007
semantics::SemanticsContext &semaCtx,
9961008
const ObjectList &objects,
@@ -1288,8 +1300,50 @@ genDistributeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
12881300
semantics::SemanticsContext &semaCtx,
12891301
lower::pft::Evaluation &eval, mlir::Location loc,
12901302
const ConstructQueue &queue, ConstructQueue::iterator item) {
1291-
TODO(loc, "Distribute construct");
1292-
return nullptr;
1303+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1304+
symTable.pushScope();
1305+
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1306+
lower::omp::isLastItemInQueue(item, queue));
1307+
dsp.processStep1();
1308+
1309+
lower::StatementContext stmtCtx;
1310+
mlir::omp::LoopNestClauseOps loopClauseOps;
1311+
mlir::omp::DistributeClauseOps distributeClauseOps;
1312+
llvm::SmallVector<const semantics::Symbol *> iv;
1313+
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
1314+
loopClauseOps, iv);
1315+
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
1316+
distributeClauseOps);
1317+
1318+
// Create omp.distribute wrapper.
1319+
auto distributeOp =
1320+
firOpBuilder.create<mlir::omp::DistributeOp>(loc, distributeClauseOps);
1321+
1322+
firOpBuilder.createBlock(&distributeOp.getRegion());
1323+
firOpBuilder.setInsertionPoint(
1324+
lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc));
1325+
1326+
// Create nested omp.loop_nest and fill body with loop contents.
1327+
auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);
1328+
1329+
auto *nestedEval =
1330+
getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
1331+
1332+
auto ivCallback = [&](mlir::Operation *op) {
1333+
genLoopVars(op, converter, loc, iv);
1334+
return iv;
1335+
};
1336+
1337+
createBodyOfOp(*loopOp,
1338+
OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
1339+
*nestedEval, llvm::omp::Directive::OMPD_simd)
1340+
.setClauses(&item->clauses)
1341+
.setDataSharingProcessor(&dsp)
1342+
.setGenRegionEntryCb(ivCallback),
1343+
queue, item);
1344+
1345+
symTable.popScope();
1346+
return distributeOp;
12931347
}
12941348

12951349
static mlir::omp::FlushOp
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
! REQUIRES: openmp_runtime
2+
3+
! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s
4+
5+
! CHECK-LABEL: func @_QPdistribute_simple
6+
subroutine distribute_simple()
7+
! CHECK: omp.teams
8+
!$omp teams
9+
10+
! CHECK: omp.distribute {
11+
!$omp distribute
12+
13+
! CHECK-NEXT: omp.loop_nest
14+
do i = 1, 10
15+
call foo()
16+
! CHECK: omp.yield
17+
end do
18+
19+
!$omp end distribute
20+
21+
! CHECK: omp.terminator
22+
!$omp end teams
23+
end subroutine distribute_simple
24+
25+
!===============================================================================
26+
! `dist_schedule` clause
27+
!===============================================================================
28+
29+
! CHECK-LABEL: func @_QPdistribute_dist_schedule
30+
! CHECK-SAME: %[[X_ARG:.*]]: !fir.ref<i32>
31+
subroutine distribute_dist_schedule(x)
32+
! CHECK: %[[X_REF:.*]]:2 = hlfir.declare %[[X_ARG]]
33+
integer, intent(in) :: x
34+
35+
! CHECK: omp.teams
36+
!$omp teams
37+
38+
! STATIC SCHEDULE, CONSTANT CHUNK SIZE
39+
40+
! CHECK: %[[CONST_CHUNK_SIZE:.*]] = arith.constant 5 : i32
41+
! CHECK: omp.distribute
42+
! CHECK-SAME: dist_schedule_static
43+
! CHECK-SAME: chunk_size(%[[CONST_CHUNK_SIZE]] : i32)
44+
!$omp distribute dist_schedule(static, 5)
45+
46+
! CHECK-NEXT: omp.loop_nest
47+
do i = 1, 10
48+
call foo()
49+
! CHECK: omp.yield
50+
end do
51+
52+
!$omp end distribute
53+
54+
! STATIC SCHEDULE, VARIABLE CHUNK SIZE
55+
56+
! CHECK: %[[X:.*]] = fir.load %[[X_REF]]#0
57+
! CHECK: omp.distribute
58+
! CHECK-SAME: dist_schedule_static
59+
! CHECK-SAME: chunk_size(%[[X]] : i32)
60+
!$omp distribute dist_schedule(static, x)
61+
62+
! CHECK-NEXT: omp.loop_nest
63+
do i = 1, 10
64+
call foo()
65+
! CHECK: omp.yield
66+
end do
67+
68+
!$omp end distribute
69+
70+
! STATIC SCHEDULE, NO CHUNK SIZE
71+
72+
! CHECK: omp.distribute
73+
! CHECK-SAME: dist_schedule_static
74+
! CHECK-NOT: chunk_size
75+
!$omp distribute dist_schedule(static)
76+
77+
! CHECK-NEXT: omp.loop_nest
78+
do i = 1, 10
79+
call foo()
80+
! CHECK: omp.yield
81+
end do
82+
83+
!$omp end distribute
84+
85+
! CHECK: omp.terminator
86+
!$omp end teams
87+
end subroutine distribute_dist_schedule
88+
89+
!===============================================================================
90+
! `allocate` clause
91+
!===============================================================================
92+
93+
! CHECK-LABEL: func @_QPdistribute_allocate
94+
subroutine distribute_allocate()
95+
use omp_lib
96+
integer :: x
97+
! CHECK: omp.teams
98+
!$omp teams
99+
100+
! CHECK: omp.distribute
101+
! CHECK-SAME: allocate(%{{.+}} : i64 -> %{{.+}} : !fir.ref<i32>)
102+
!$omp distribute allocate(omp_high_bw_mem_alloc: x) private(x)
103+
104+
! CHECK-NEXT: omp.loop_nest
105+
do i = 1, 10
106+
x = i
107+
! CHECK: omp.yield
108+
end do
109+
110+
!$omp end distribute
111+
112+
! CHECK: omp.terminator
113+
!$omp end teams
114+
end subroutine distribute_allocate

0 commit comments

Comments
 (0)