Skip to content

Commit 4202d69

Browse files
kiranchandramohanSouraVXschweitzpgiclementvalNimishMishra
committed
[Flang][OpenMP] Upstream the lowering of the parallel do combined construct
When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar <[email protected]> Co-authored-by: Eric Schweitz <[email protected]> Co-authored-by: Valentin Clement <[email protected]> Co-authored-by: Nimish Mishra <[email protected]>
1 parent c153c61 commit 4202d69

File tree

4 files changed

+221
-29
lines changed

4 files changed

+221
-29
lines changed

flang/lib/Lower/OpenMP.cpp

Lines changed: 90 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
278278
standaloneConstruct.u);
279279
}
280280

281+
static omp::ClauseProcBindKindAttr genProcBindKindAttr(
282+
fir::FirOpBuilder &firOpBuilder,
283+
const Fortran::parser::OmpClause::ProcBind *procBindClause) {
284+
omp::ClauseProcBindKind pbKind;
285+
switch (procBindClause->v.v) {
286+
case Fortran::parser::OmpProcBindClause::Type::Master:
287+
pbKind = omp::ClauseProcBindKind::Master;
288+
break;
289+
case Fortran::parser::OmpProcBindClause::Type::Close:
290+
pbKind = omp::ClauseProcBindKind::Close;
291+
break;
292+
case Fortran::parser::OmpProcBindClause::Type::Spread:
293+
pbKind = omp::ClauseProcBindKind::Spread;
294+
break;
295+
case Fortran::parser::OmpProcBindClause::Type::Primary:
296+
pbKind = omp::ClauseProcBindKind::Primary;
297+
break;
298+
}
299+
return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
300+
}
301+
302+
/* When parallel is used in a combined construct, then use this function to
303+
* create the parallel operation. It handles the parallel specific clauses
304+
* and leaves the rest for handling at the inner operations.
305+
* TODO: Refactor clause handling
306+
*/
307+
template <typename Directive>
308+
static void
309+
createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
310+
Fortran::lower::pft::Evaluation &eval,
311+
const Directive &directive) {
312+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
313+
mlir::Location currentLocation = converter.getCurrentLocation();
314+
Fortran::lower::StatementContext stmtCtx;
315+
llvm::ArrayRef<mlir::Type> argTy;
316+
mlir::Value ifClauseOperand, numThreadsClauseOperand;
317+
SmallVector<Value> allocatorOperands, allocateOperands;
318+
mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
319+
const auto &opClauseList =
320+
std::get<Fortran::parser::OmpClauseList>(directive.t);
321+
// TODO: Handle the following clauses
322+
// 1. default
323+
// 2. copyin
324+
// Note: rest of the clauses are handled when the inner operation is created
325+
for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
326+
if (const auto &ifClause =
327+
std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
328+
auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
329+
mlir::Value ifVal = fir::getBase(
330+
converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
331+
ifClauseOperand = firOpBuilder.createConvert(
332+
currentLocation, firOpBuilder.getI1Type(), ifVal);
333+
} else if (const auto &numThreadsClause =
334+
std::get_if<Fortran::parser::OmpClause::NumThreads>(
335+
&clause.u)) {
336+
numThreadsClauseOperand = fir::getBase(converter.genExprValue(
337+
*Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
338+
} else if (const auto &procBindClause =
339+
std::get_if<Fortran::parser::OmpClause::ProcBind>(
340+
&clause.u)) {
341+
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
342+
}
343+
}
344+
// Create and insert the operation.
345+
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
346+
currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
347+
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
348+
/*reductions=*/nullptr, procBindKindAttr);
349+
350+
createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
351+
&opClauseList, /*iv=*/{},
352+
/*isCombined=*/true);
353+
}
354+
281355
static void
282356
genOMP(Fortran::lower::AbstractConverter &converter,
283357
Fortran::lower::pft::Evaluation &eval,
@@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
318392
} else if (const auto &procBindClause =
319393
std::get_if<Fortran::parser::OmpClause::ProcBind>(
320394
&clause.u)) {
321-
omp::ClauseProcBindKind pbKind;
322-
switch (procBindClause->v.v) {
323-
case Fortran::parser::OmpProcBindClause::Type::Master:
324-
pbKind = omp::ClauseProcBindKind::Master;
325-
break;
326-
case Fortran::parser::OmpProcBindClause::Type::Close:
327-
pbKind = omp::ClauseProcBindKind::Close;
328-
break;
329-
case Fortran::parser::OmpProcBindClause::Type::Spread:
330-
pbKind = omp::ClauseProcBindKind::Spread;
331-
break;
332-
case Fortran::parser::OmpProcBindClause::Type::Primary:
333-
pbKind = omp::ClauseProcBindKind::Primary;
334-
break;
335-
}
336-
procBindKindAttr =
337-
omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
395+
procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
338396
} else if (const auto &allocateClause =
339397
std::get_if<Fortran::parser::OmpClause::Allocate>(
340398
&clause.u)) {
@@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
419477
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
420478
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
421479
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
422-
if (llvm::omp::OMPD_do !=
480+
481+
const auto ompDirective =
423482
std::get<Fortran::parser::OmpLoopDirective>(
424483
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
425-
.v) {
426-
TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
484+
.v;
485+
if (llvm::omp::OMPD_parallel_do == ompDirective) {
486+
createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
487+
converter, eval,
488+
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
489+
} else if (llvm::omp::OMPD_do != ompDirective) {
490+
TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
427491
}
428492

429493
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
@@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
648712

649713
// Parallel Sections Construct
650714
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
651-
auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
652-
currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
653-
allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
654-
/*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
655-
createBodyOfOp(parallelOp, converter, currentLocation);
715+
createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
716+
converter, eval,
717+
std::get<Fortran::parser::OmpBeginSectionsDirective>(
718+
sectionsConstruct.t));
656719
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
657720
currentLocation, /*reduction_vars*/ ValueRange(),
658-
/*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
659-
/*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
721+
/*reductions=*/nullptr, allocateOperands, allocatorOperands,
722+
/*nowait=*/nullptr);
660723
createBodyOfOp(sectionsOp, converter, currentLocation);
661724

662725
// Sections Construct

flang/test/Fir/convert-to-llvm-openmp-and-fir.fir

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
7171
// CHECK: }
7272
// CHECK: llvm.return
7373
// CHECK: }
74+
75+
76+
// -----
77+
78+
func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
79+
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
80+
omp.parallel {
81+
%c1 = arith.constant 1 : i32
82+
%c50 = arith.constant 50 : i32
83+
omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
84+
%1 = fir.convert %indx : (i32) -> i64
85+
%c1_i64 = arith.constant 1 : i64
86+
%2 = arith.subi %1, %c1_i64 : i64
87+
%3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
88+
fir.store %indx to %3 : !fir.ref<i32>
89+
omp.yield
90+
}
91+
omp.terminator
92+
}
93+
return
94+
}
95+
96+
// Check only for the structure of the OpenMP portion and the feasibility of the conversion
97+
// CHECK-LABEL: @_QPsb
98+
// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
99+
// CHECK: omp.parallel {
100+
// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
101+
// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
102+
// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
103+
// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
104+
// CHECK: omp.yield
105+
// CHECK: omp.terminator
106+
// CHECK: llvm.return
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
! This test checks lowering of OpenMP DO Directive (Worksharing).
2+
3+
! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
4+
5+
! CHECK-LABEL: func @_QPsimple_parallel_do()
6+
subroutine simple_parallel_do
7+
integer :: i
8+
! CHECK: omp.parallel
9+
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
10+
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
11+
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
12+
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
13+
!$OMP PARALLEL DO
14+
do i=1, 9
15+
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
16+
print*, i
17+
end do
18+
! CHECK: omp.yield
19+
! CHECK: omp.terminator
20+
!$OMP END PARALLEL DO
21+
end subroutine
22+
23+
! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
24+
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
25+
subroutine parallel_do_with_parallel_clauses(cond, nt)
26+
logical :: cond
27+
integer :: nt
28+
integer :: i
29+
! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
30+
! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
31+
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
32+
! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
33+
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
34+
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
35+
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
36+
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
37+
!$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
38+
do i=1, 9
39+
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
40+
print*, i
41+
end do
42+
! CHECK: omp.yield
43+
! CHECK: omp.terminator
44+
!$OMP END PARALLEL DO
45+
end subroutine
46+
47+
! CHECK-LABEL: func @_QPparallel_do_with_clauses
48+
! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
49+
subroutine parallel_do_with_clauses(nt)
50+
integer :: nt
51+
integer :: i
52+
! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
53+
! CHECK: omp.parallel num_threads(%[[NT]] : i32)
54+
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
55+
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
56+
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
57+
! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
58+
!$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
59+
do i=1, 9
60+
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
61+
print*, i
62+
end do
63+
! CHECK: omp.yield
64+
! CHECK: omp.terminator
65+
!$OMP END PARALLEL DO
66+
end subroutine
67+
68+
! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
69+
! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
70+
subroutine parallel_do_with_privatisation_clauses(cond,nt)
71+
logical :: cond
72+
integer :: nt
73+
integer :: i
74+
! CHECK: omp.parallel
75+
! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
76+
! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
77+
! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
78+
! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
79+
! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
80+
! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
81+
! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
82+
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
83+
!$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
84+
do i=1, 9
85+
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
86+
! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
87+
! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
88+
! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
89+
! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
90+
! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
91+
print*, i, cond, nt
92+
end do
93+
! CHECK: omp.yield
94+
! CHECK: omp.terminator
95+
!$OMP END PARALLEL DO
96+
end subroutine

flang/test/Lower/OpenMP/parallel-sections.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
4040
integer, intent(inout) :: x, y
4141
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
4242
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
43-
!OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
44-
!OMPDialect: omp.sections {
43+
!OMPDialect: omp.parallel {
44+
!OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
4545
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
4646
!OMPDialect: omp.section {
4747
!$omp section

0 commit comments

Comments
 (0)