llvm
diff --git a/‎flang/docs/DoConcurrentConversionToOpenMP.md
Lines changed: 47 additions & 0 deletions b/‎flang/docs/DoConcurrentConversionToOpenMP.md
Lines changed: 47 additions & 0 deletions
diff --git a/‎flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
Lines changed: 201 additions & 10 deletions b/‎flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
Lines changed: 201 additions & 10 deletions
diff --git a/‎flang/test/Transforms/DoConcurrent/basic_host.f90
Lines changed: 5 additions & 9 deletions b/‎flang/test/Transforms/DoConcurrent/basic_host.f90
Lines changed: 5 additions & 9 deletions
@@ -126,6 +126,53 @@ see the "Data environment" section below.
 See `flang/test/Transforms/DoConcurrent/loop_nest_test.f90` for more examples
 of what is and is not detected as a perfect loop nest.
 
+### Single-range loops
+
+Given the following loop:
+```fortran
+  do concurrent(i=1:n)
+    a(i) = i * i
+  end do
+```
+
+#### Mapping to `host`
+
+Mapping this loop to the `host`, generates MLIR operations of the following
+structure:
+
+```
+%4 = fir.address_of(@_QFEa) ...
+%6:2 = hlfir.declare %4 ...
+
+omp.parallel {
+  // Allocate private copy for `i`.
+  // TODO Use delayed privatization.
+  %19 = fir.alloca i32 {bindc_name = "i"}
+  %20:2 = hlfir.declare %19 {uniq_name = "_QFEi"} ...
+
+  omp.wsloop {
+    omp.loop_nest (%arg0) : index = (%21) to (%22) inclusive step (%c1_2) {
+      %23 = fir.convert %arg0 : (index) -> i32
+      // Use the privatized version of `i`.
+      fir.store %23 to %20#1 : !fir.ref<i32>
+      ...
+
+      // Use "shared" SSA value of `a`.
+      %42 = hlfir.designate %6#0
+      hlfir.assign %35 to %42
+      ...
+      omp.yield
+    }
+    omp.terminator
+  }
+  omp.terminator
+}
+```
+
+#### Mapping to `device`
+
+<!-- TODO -->
+
 <!--
 More details about current status will be added along with relevant parts of the
 implementation in later upstreaming patches.
 
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/OpenMP/Utils.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/IRMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/RegionUtils.h"
 
@@ -24,7 +25,82 @@ namespace flangomp {
 
 namespace {
 namespace looputils {
-using LoopNest = llvm::SetVector<fir::DoLoopOp>;
+/// Stores info needed about the induction/iteration variable for each `do
+/// concurrent` in a loop nest.
+struct InductionVariableInfo {
+  /// the operation allocating memory for iteration variable,
+  mlir::Operation *iterVarMemDef;
+};
+
+using LoopNestToIndVarMap =
+    llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
+
+/// Given an operation `op`, this returns true if one of `op`'s operands is
+/// "ultimately" the loop's induction variable. This helps in cases where the
+/// induction variable's use is "hidden" behind a convert/cast.
+///
+/// For example, give the following loop:
+/// ```
+///   fir.do_loop %ind_var = %lb to %ub step %s unordered {
+///     %ind_var_conv = fir.convert %ind_var : (index) -> i32
+///     fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
+///     ...
+///   }
+/// ```
+///
+/// If \p op is the `fir.store` operation, then this function will return true
+/// since the IV is the "ultimate" operand to the `fir.store` op through the
+/// `%ind_var_conv` -> `%ind_var` conversion sequence.
+///
+/// For why this is useful, see its use in `findLoopIndVarMemDecl`.
+bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) {
+  while (op != nullptr && op->getNumOperands() > 0) {
+    auto ivIt = llvm::find_if(op->getOperands(), [&](mlir::Value operand) {
+      return operand == doLoop.getInductionVar();
+    });
+
+    if (ivIt != op->getOperands().end())
+      return true;
+
+    op = op->getOperand(0).getDefiningOp();
+  }
+
+  return false;
+}
+
+/// For the \p doLoop parameter, find the operation that declares its iteration
+/// variable or allocates memory for it.
+///
+/// For example, give the following loop:
+/// ```
+///   ...
+///   %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
+///   ...
+///   fir.do_loop %ind_var = %lb to %ub step %s unordered {
+///     %ind_var_conv = fir.convert %ind_var : (index) -> i32
+///     fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
+///     ...
+///   }
+/// ```
+///
+/// This function returns the `hlfir.declare` op for `%i`.
+mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) {
+  mlir::Value result = nullptr;
+  mlir::visitUsedValuesDefinedAbove(
+      doLoop.getRegion(), [&](mlir::OpOperand *operand) {
+        if (result)
+          return;
+
+        if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) {
+          assert(result == nullptr &&
+                 "loop can have only one induction variable");
+          result = operand->get();
+        }
+      });
+
+  assert(result != nullptr && result.getDefiningOp() != nullptr);
+  return result.getDefiningOp();
+}
 
 /// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
 /// there are no operations in \p outerloop's body other than:
@@ -116,11 +192,14 @@ bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
 /// fails to recognize a certain nested loop as part of the nest it just returns
 /// the parent loops it discovered before.
 mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
-                                    LoopNest &loopNest) {
+                                    LoopNestToIndVarMap &loopNest) {
   assert(currentLoop.getUnordered());
 
   while (true) {
-    loopNest.insert(currentLoop);
+    loopNest.insert(
+        {currentLoop,
+         InductionVariableInfo{findLoopIterationVarMemDecl(currentLoop)}});
+
     llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
 
     for (auto nestedLoop : currentLoop.getRegion().getOps<fir::DoLoopOp>())
@@ -152,26 +231,136 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
 public:
   using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
 
-  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice)
-      : OpConversionPattern(context), mapToDevice(mapToDevice) {}
+  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice,
+                         llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip)
+      : OpConversionPattern(context), mapToDevice(mapToDevice),
+        concurrentLoopsToSkip(concurrentLoopsToSkip) {}
 
   mlir::LogicalResult
   matchAndRewrite(fir::DoLoopOp doLoop, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    looputils::LoopNest loopNest;
+    looputils::LoopNestToIndVarMap loopNest;
     bool hasRemainingNestedLoops =
         failed(looputils::collectLoopNest(doLoop, loopNest));
     if (hasRemainingNestedLoops)
       mlir::emitWarning(doLoop.getLoc(),
                         "Some `do concurent` loops are not perfectly-nested. "
                         "These will be serialized.");
 
-    // TODO This will be filled in with the next PRs that upstreams the rest of
-    // the ROCm implementaion.
+    mlir::IRMapping mapper;
+    genParallelOp(doLoop.getLoc(), rewriter, loopNest, mapper);
+    mlir::omp::LoopNestOperands loopNestClauseOps;
+    genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper,
+                         loopNestClauseOps);
+
+    mlir::omp::LoopNestOp ompLoopNest =
+        genWsLoopOp(rewriter, loopNest.back().first, mapper, loopNestClauseOps,
+                    /*isComposite=*/mapToDevice);
+
+    rewriter.eraseOp(doLoop);
+
+    // Mark `unordered` loops that are not perfectly nested to be skipped from
+    // the legality check of the `ConversionTarget` since we are not interested
+    // in mapping them to OpenMP.
+    ompLoopNest->walk([&](fir::DoLoopOp doLoop) {
+      if (doLoop.getUnordered()) {
+        concurrentLoopsToSkip.insert(doLoop);
+      }
+    });
+
     return mlir::success();
   }
 
+private:
+  mlir::omp::ParallelOp genParallelOp(mlir::Location loc,
+                                      mlir::ConversionPatternRewriter &rewriter,
+                                      looputils::LoopNestToIndVarMap &loopNest,
+                                      mlir::IRMapping &mapper) const {
+    auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loc);
+    rewriter.createBlock(&parallelOp.getRegion());
+    rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
+
+    genLoopNestIndVarAllocs(rewriter, loopNest, mapper);
+    return parallelOp;
+  }
+
+  void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter,
+                               looputils::LoopNestToIndVarMap &loopNest,
+                               mlir::IRMapping &mapper) const {
+
+    for (auto &[_, indVarInfo] : loopNest)
+      genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper);
+  }
+
+  mlir::Operation *
+  genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
+                            mlir::Operation *indVarMemDef,
+                            mlir::IRMapping &mapper) const {
+    assert(
+        indVarMemDef != nullptr &&
+        "Induction variable memdef is expected to have a defining operation.");
+
+    llvm::SmallSetVector<mlir::Operation *, 2> indVarDeclareAndAlloc;
+    for (auto operand : indVarMemDef->getOperands())
+      indVarDeclareAndAlloc.insert(operand.getDefiningOp());
+    indVarDeclareAndAlloc.insert(indVarMemDef);
+
+    mlir::Operation *result;
+    for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
+      result = rewriter.clone(*opToClone, mapper);
+
+    return result;
+  }
+
+  void genLoopNestClauseOps(
+      mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+      looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
+      mlir::omp::LoopNestOperands &loopNestClauseOps) const {
+    assert(loopNestClauseOps.loopLowerBounds.empty() &&
+           "Loop nest bounds were already emitted!");
+
+    auto populateBounds = [&](mlir::Value var,
+                              llvm::SmallVectorImpl<mlir::Value> &bounds) {
+      bounds.push_back(var.getDefiningOp()->getResult(0));
+    };
+
+    for (auto &[doLoop, _] : loopNest) {
+      populateBounds(doLoop.getLowerBound(), loopNestClauseOps.loopLowerBounds);
+      populateBounds(doLoop.getUpperBound(), loopNestClauseOps.loopUpperBounds);
+      populateBounds(doLoop.getStep(), loopNestClauseOps.loopSteps);
+    }
+
+    loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
+  }
+
+  mlir::omp::LoopNestOp
+  genWsLoopOp(mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
+              mlir::IRMapping &mapper,
+              const mlir::omp::LoopNestOperands &clauseOps,
+              bool isComposite) const {
+
+    auto wsloopOp = rewriter.create<mlir::omp::WsloopOp>(doLoop.getLoc());
+    wsloopOp.setComposite(isComposite);
+    rewriter.createBlock(&wsloopOp.getRegion());
+
+    auto loopNestOp =
+        rewriter.create<mlir::omp::LoopNestOp>(doLoop.getLoc(), clauseOps);
+
+    // Clone the loop's body inside the loop nest construct using the
+    // mapped values.
+    rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(),
+                               loopNestOp.getRegion().begin(), mapper);
+
+    mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator();
+    rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
+    rewriter.create<mlir::omp::YieldOp>(terminator->getLoc());
+    rewriter.eraseOp(terminator);
+
+    return loopNestOp;
+  }
+
   bool mapToDevice;
+  llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
 };
 
 class DoConcurrentConversionPass
@@ -200,16 +389,18 @@ class DoConcurrentConversionPass
       return;
     }
 
+    llvm::DenseSet<fir::DoLoopOp> concurrentLoopsToSkip;
     mlir::RewritePatternSet patterns(context);
     patterns.insert<DoConcurrentConversion>(
-        context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device);
+        context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
+        concurrentLoopsToSkip);
     mlir::ConversionTarget target(*context);
     target.addDynamicallyLegalOp<fir::DoLoopOp>([&](fir::DoLoopOp op) {
       // The goal is to handle constructs that eventually get lowered to
       // `fir.do_loop` with the `unordered` attribute (e.g. array expressions).
       // Currently, this is only enabled for the `do concurrent` construct since
       // the pass runs early in the pipeline.
-      return !op.getUnordered();
+      return !op.getUnordered() || concurrentLoopsToSkip.contains(op);
     });
     target.markUnknownOpDynamicallyLegal(
         [](mlir::Operation *) { return true; });
 
@@ -1,7 +1,3 @@
-! Mark as xfail for now until we upstream the relevant part. This is just for
-! demo purposes at this point. Upstreaming this is the next step.
-! XFAIL: *
-
 ! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`.
 
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
@@ -19,17 +15,17 @@ program do_concurrent_basic
 
     ! CHECK-NOT: fir.do_loop
 
-    ! CHECK: omp.parallel {
-
-    ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
-    ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
     ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
     ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index
     ! CHECK: %[[C10:.*]] = arith.constant 10 : i32
     ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index
     ! CHECK: %[[STEP:.*]] = arith.constant 1 : index
 
+    ! CHECK: omp.parallel {
+
+    ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
+    ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
     ! CHECK: omp.wsloop {
     ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
     ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32