[Flang][OpenMP] Upstream the lowering of the parallel do combined construct

kiranchandramohan · SouraVX · schweitzpgi · kiranchandramohan · commit 4202d69d9efe · 2022-05-19T21:13:50.000Z
When parallel is used in a combined construct, then use a separate function to create the parallel operation. It handles the parallel specific clauses and leaves the rest for handling at the inner operations. Reviewed By: peixin, shraiysh Differential Revision: https://reviews.llvm.org/D125465 Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com> Co-authored-by: Eric Schweitz <eschweitz@nvidia.com> Co-authored-by: Valentin Clement <clementval@gmail.com> Co-authored-by: Nimish Mishra <neelam.nimish@gmail.com>
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
@@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       standaloneConstruct.u);
 }
 
+static omp::ClauseProcBindKindAttr genProcBindKindAttr(
+    fir::FirOpBuilder &firOpBuilder,
+    const Fortran::parser::OmpClause::ProcBind *procBindClause) {
+  omp::ClauseProcBindKind pbKind;
+  switch (procBindClause->v.v) {
+  case Fortran::parser::OmpProcBindClause::Type::Master:
+    pbKind = omp::ClauseProcBindKind::Master;
+    break;
+  case Fortran::parser::OmpProcBindClause::Type::Close:
+    pbKind = omp::ClauseProcBindKind::Close;
+    break;
+  case Fortran::parser::OmpProcBindClause::Type::Spread:
+    pbKind = omp::ClauseProcBindKind::Spread;
+    break;
+  case Fortran::parser::OmpProcBindClause::Type::Primary:
+    pbKind = omp::ClauseProcBindKind::Primary;
+    break;
+  }
+  return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
+}
+
+/* When parallel is used in a combined construct, then use this function to
+ * create the parallel operation. It handles the parallel specific clauses
+ * and leaves the rest for handling at the inner operations.
+ * TODO: Refactor clause handling
+ */
+template <typename Directive>
+static void
+createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
+                         Fortran::lower::pft::Evaluation &eval,
+                         const Directive &directive) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Location currentLocation = converter.getCurrentLocation();
+  Fortran::lower::StatementContext stmtCtx;
+  llvm::ArrayRef<mlir::Type> argTy;
+  mlir::Value ifClauseOperand, numThreadsClauseOperand;
+  SmallVector<Value> allocatorOperands, allocateOperands;
+  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
+  const auto &opClauseList =
+      std::get<Fortran::parser::OmpClauseList>(directive.t);
+  // TODO: Handle the following clauses
+  // 1. default
+  // 2. copyin
+  // Note: rest of the clauses are handled when the inner operation is created
+  for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
+    if (const auto &ifClause =
+            std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
+      auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
+      mlir::Value ifVal = fir::getBase(
+          converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
+      ifClauseOperand = firOpBuilder.createConvert(
+          currentLocation, firOpBuilder.getI1Type(), ifVal);
+    } else if (const auto &numThreadsClause =
+                   std::get_if<Fortran::parser::OmpClause::NumThreads>(
+                       &clause.u)) {
+      numThreadsClauseOperand = fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
+    } else if (const auto &procBindClause =
+                   std::get_if<Fortran::parser::OmpClause::ProcBind>(
+                       &clause.u)) {
+      procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
+    }
+  }
+  // Create and insert the operation.
+  auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
+      currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
+      allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
+      /*reductions=*/nullptr, procBindKindAttr);
+
+  createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
+                                  &opClauseList, /*iv=*/{},
+                                  /*isCombined=*/true);
+}
+
 static void
 genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::pft::Evaluation &eval,
@@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     } else if (const auto &procBindClause =
                    std::get_if<Fortran::parser::OmpClause::ProcBind>(
                        &clause.u)) {
-      omp::ClauseProcBindKind pbKind;
-      switch (procBindClause->v.v) {
-      case Fortran::parser::OmpProcBindClause::Type::Master:
-        pbKind = omp::ClauseProcBindKind::Master;
-        break;
-      case Fortran::parser::OmpProcBindClause::Type::Close:
-        pbKind = omp::ClauseProcBindKind::Close;
-        break;
-      case Fortran::parser::OmpProcBindClause::Type::Spread:
-        pbKind = omp::ClauseProcBindKind::Spread;
-        break;
-      case Fortran::parser::OmpProcBindClause::Type::Primary:
-        pbKind = omp::ClauseProcBindKind::Primary;
-        break;
-      }
-      procBindKindAttr =
-          omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
+      procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
     } else if (const auto &allocateClause =
                    std::get_if<Fortran::parser::OmpClause::Allocate>(
                        &clause.u)) {
@@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
   const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
-  if (llvm::omp::OMPD_do !=
+
+  const auto ompDirective =
       std::get<Fortran::parser::OmpLoopDirective>(
           std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
-          .v) {
-    TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
+          .v;
+  if (llvm::omp::OMPD_parallel_do == ompDirective) {
+    createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
+        converter, eval,
+        std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
+  } else if (llvm::omp::OMPD_do != ompDirective) {
+    TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
   }
 
   int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
@@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   // Parallel Sections Construct
   if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
-        currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
-        allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
-        /*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
-    createBodyOfOp(parallelOp, converter, currentLocation);
+    createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
+        converter, eval,
+        std::get<Fortran::parser::OmpBeginSectionsDirective>(
+            sectionsConstruct.t));
     auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
         currentLocation, /*reduction_vars*/ ValueRange(),
-        /*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
-        /*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
+        /*reductions=*/nullptr, allocateOperands, allocatorOperands,
+        /*nowait=*/nullptr);
     createBodyOfOp(sectionsOp, converter, currentLocation);
 
     // Sections Construct
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
 // CHECK: }
 // CHECK: llvm.return
 // CHECK: }
+
+
+// -----
+
+func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
+  %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
+  omp.parallel   {
+    %c1 = arith.constant 1 : i32
+    %c50 = arith.constant 50 : i32
+    omp.wsloop   for  (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
+      %1 = fir.convert %indx : (i32) -> i64
+      %c1_i64 = arith.constant 1 : i64
+      %2 = arith.subi %1, %c1_i64 : i64
+      %3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+      fir.store %indx to %3 : !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// Check only for the structure of the OpenMP portion and the feasibility of the conversion
+// CHECK-LABEL: @_QPsb
+// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
+// CHECK:    omp.parallel   {
+// CHECK:      %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
+// CHECK:      %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
+// CHECK:      omp.wsloop   for  (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
+// CHECK:        llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
+// CHECK:        omp.yield
+// CHECK:      omp.terminator
+// CHECK:    llvm.return
diff --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@@ -0,0 +1,96 @@
+! This test checks lowering of OpenMP DO Directive (Worksharing).
+
+! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsimple_parallel_do()
+subroutine simple_parallel_do
+  integer :: i
+  ! CHECK:  omp.parallel
+  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  !$OMP PARALLEL DO
+  do i=1, 9
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  ! CHECK:       omp.yield
+  ! CHECK:       omp.terminator
+  !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
+! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_parallel_clauses(cond, nt)
+  logical :: cond
+  integer :: nt
+  integer :: i
+  ! CHECK:  %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:  %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
+  ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+  ! CHECK:  omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
+  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
+  do i=1, 9
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  ! CHECK:       omp.yield
+  ! CHECK:       omp.terminator
+  !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_clauses
+! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_clauses(nt)
+  integer :: nt
+  integer :: i
+  ! CHECK:  %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+  ! CHECK:  omp.parallel num_threads(%[[NT]] : i32)
+  ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
+  do i=1, 9
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i
+  end do
+  ! CHECK:       omp.yield
+  ! CHECK:       omp.terminator
+  !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
+! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_privatisation_clauses(cond,nt)
+  logical :: cond
+  integer :: nt
+  integer :: i
+  ! CHECK:  omp.parallel
+  ! CHECK:    %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
+  ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
+  ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+  ! CHECK:    fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
+  ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
+  ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
+  ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
+  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
+  do i=1, 9
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
+  ! CHECK:      fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
+  ! CHECK:      %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
+    print*, i, cond, nt
+  end do
+  ! CHECK:      omp.yield
+  ! CHECK:    omp.terminator
+  !$OMP END PARALLEL DO
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-sections.f90 b/flang/test/Lower/OpenMP/parallel-sections.f90
@@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
   integer, intent(inout) :: x, y
   !FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
   !LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
-  !OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
-  !OMPDialect: omp.sections {
+  !OMPDialect: omp.parallel {
+  !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
   !$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
     !OMPDialect: omp.section {
     !$omp section