llvm · tblah · Mar 20, 2024 · Feb 12, 2024 · Mar 18, 2024 · Mar 18, 2024
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -434,6 +434,11 @@ std::pair<hlfir::Entity, mlir::Value>
 createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
                    hlfir::Entity mold);
 
+// TODO: this does not support polymorphic molds
+hlfir::Entity createStackTempFromMold(mlir::Location loc,
+                                      fir::FirOpBuilder &builder,
+                                      hlfir::Entity mold);
+
 hlfir::EntityWithAttributes convertCharacterKind(mlir::Location loc,
                                                  fir::FirOpBuilder &builder,
                                                  hlfir::Entity scalarChar,

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -13,6 +13,7 @@
 #include "ReductionProcessor.h"
 
 #include "flang/Lower/AbstractConverter.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
@@ -90,10 +91,42 @@ std::string ReductionProcessor::getReductionName(llvm::StringRef name,
   if (isByRef)
     byrefAddition = "_byref";
 
-  return (llvm::Twine(name) +
-          (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
-          llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
-      .str();
+  if (fir::isa_trivial(ty))
+    return (llvm::Twine(name) +
+            (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
+            llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
+        .str();
+
+  // creates a name like reduction_i_64_box_ux4x3
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
+    // TODO: support for allocatable boxes:
+    // !fir.box<!fir.heap<!fir.array<...>>>
+    fir::SequenceType seqTy = fir::unwrapRefType(boxTy.getEleTy())
+                                  .dyn_cast_or_null<fir::SequenceType>();
+    if (!seqTy)
+      return {};
+
+    std::string prefix = getReductionName(
+        name, fir::unwrapSeqOrBoxedSeqType(ty), /*isByRef=*/false);
+    if (prefix.empty())
+      return {};
+    std::stringstream tyStr;
+    tyStr << prefix << "_box_";
+    bool first = true;
+    for (std::int64_t extent : seqTy.getShape()) {
+      if (first)
+        first = false;
+      else
+        tyStr << "x";
+      if (extent == seqTy.getUnknownExtent())
+        tyStr << 'u'; // I'm not sure that '?' is safe in symbol names
+      else
+        tyStr << extent;
+    }
+    return (tyStr.str() + byrefAddition).str();
+  }
+
+  return {};
 std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, 
 std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, 
 }
 
 std::string ReductionProcessor::getReductionName(
@@ -281,13 +314,158 @@ mlir::Value ReductionProcessor::createScalarCombiner(
   return reductionOp;
 }
 
+/// Create reduction combiner region for reduction variables which are boxed
+/// arrays
+static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
+                           ReductionProcessor::ReductionIdentifier redId,
+                           fir::BaseBoxType boxTy, mlir::Value lhs,
+                           mlir::Value rhs) {
+  fir::SequenceType seqTy =
+      mlir::dyn_cast_or_null<fir::SequenceType>(boxTy.getEleTy());
+  // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
+  if (!seqTy || seqTy.hasUnknownShape())
+    TODO(loc, "Unsupported boxed type in OpenMP reduction");
+
+  // load fir.ref<fir.box<...>>
+  mlir::Value lhsAddr = lhs;
+  lhs = builder.create<fir::LoadOp>(loc, lhs);
+  rhs = builder.create<fir::LoadOp>(loc, rhs);
+
+  const unsigned rank = seqTy.getDimension();
+  llvm::SmallVector<mlir::Value> extents;
+  extents.reserve(rank);
+  llvm::SmallVector<mlir::Value> lbAndExtents;
+  lbAndExtents.reserve(rank * 2);
+
+  // Get box lowerbounds and extents:
+  mlir::Type idxTy = builder.getIndexType();
+  for (unsigned i = 0; i < rank; ++i) {
+    // TODO: ideally we want to hoist box reads out of the critical section.
+    // We could do this by having box dimensions in block arguments like
+    // OpenACC does
+    mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+    auto dimInfo =
+        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, lhs, dim);
+    extents.push_back(dimInfo.getExtent());
+    lbAndExtents.push_back(dimInfo.getLowerBound());
+    lbAndExtents.push_back(dimInfo.getExtent());
+  }
+
+  auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
+  auto shapeShift =
+      builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
+
+  // Iterate over array elements, applying the equivalent scalar reduction:
+
+  // A hlfir::elemental here gets inlined with a temporary so create the
+  // loop nest directly.
+  // This function already controls all of the code in this region so we
+  // know this won't miss any opportuinties for clever elemental inlining
+  hlfir::LoopNest nest =
+      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
+  auto lhsEleAddr = builder.create<fir::ArrayCoorOp>(
+      loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
+      nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
+  auto rhsEleAddr = builder.create<fir::ArrayCoorOp>(
+      loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{},
+      nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
+  auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr);
+  auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr);
+  mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner(
+      builder, loc, redId, refTy, lhsEle, rhsEle);
+  builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr);
+
+  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.create<mlir::omp::YieldOp>(loc, lhsAddr);
+}
+
+// generate combiner region for reduction operations
+static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
+                        ReductionProcessor::ReductionIdentifier redId,
+                        mlir::Type ty, mlir::Value lhs, mlir::Value rhs,
+                        bool isByRef) {
+  ty = fir::unwrapRefType(ty);
+
+  if (fir::isa_trivial(ty)) {
+    mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs);
+    mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs);
+
+    mlir::Value result = ReductionProcessor::createScalarCombiner(
+        builder, loc, redId, ty, lhsLoaded, rhsLoaded);
+    if (isByRef) {
+      builder.create<fir::StoreOp>(loc, result, lhs);
+      builder.create<mlir::omp::YieldOp>(loc, lhs);
+    } else {
+      builder.create<mlir::omp::YieldOp>(loc, result);
+    }
+    return;
+  }
+  // all arrays should have been boxed
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) {
+    genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs);
+    return;
+  }
+
+  TODO(loc, "OpenMP genCombiner for unsupported reduction variable type");
+}
+
+static mlir::Value
+createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc,
+                          const ReductionProcessor::ReductionIdentifier redId,
+                          mlir::Type type, bool isByRef) {
+  mlir::Type ty = fir::unwrapRefType(type);
+  mlir::Value initValue = ReductionProcessor::getReductionInitValue(
+      loc, fir::unwrapSeqOrBoxedSeqType(ty), redId, builder);
+
+  if (fir::isa_trivial(ty)) {
+    if (isByRef) {
+      mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
+      builder.createStoreWithConvert(loc, initValue, alloca);
+      return alloca;
+    }
+    // by val
+    return initValue;
+  }
+
+  // all arrays are boxed
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
+    assert(isByRef && "passing arrays by value is unsupported");
+    // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
+    mlir::Type innerTy = fir::extractSequenceType(boxTy);
+    if (!mlir::isa<fir::SequenceType>(innerTy))
+      TODO(loc, "Unsupported boxed type for reduction");
+    // Create the private copy from the initial fir.box:
+    hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)};
+
+    // TODO: if the whole reduction is nested inside of a loop, this alloca
+    // could lead to a stack overflow (the memory is only freed at the end of
+    // the stack frame). The reduction declare operation needs a deallocation
+    // region to undo the init region.
+    hlfir::Entity temp = createStackTempFromMold(loc, builder, source);
+
+    // Put the temporary inside of a box:
+    hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp);
+    builder.create<hlfir::AssignOp>(loc, initValue, box);
+    mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty);
+    builder.create<fir::StoreOp>(loc, box, boxAlloca);
+    return boxAlloca;
+  }
+
+  TODO(loc, "createReductionInitRegion for unsupported type");
+}
+
 mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
     fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
     const ReductionIdentifier redId, mlir::Type type, mlir::Location loc,
     bool isByRef) {
   mlir::OpBuilder::InsertionGuard guard(builder);
   mlir::ModuleOp module = builder.getModule();
 
+  if (reductionOpName.empty())
+    TODO(loc, "Reduction of some types is not supported");
+
   auto decl =
       module.lookupSymbol<mlir::omp::ReductionDeclareOp>(reductionOpName);
   if (decl)
@@ -304,14 +482,9 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
                       decl.getInitializerRegion().end(), {type}, {loc});
   builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());
 
-  mlir::Value init = getReductionInitValue(loc, type, redId, builder);
-  if (isByRef) {
-    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, valTy);
-    builder.createStoreWithConvert(loc, init, alloca);
-    builder.create<mlir::omp::YieldOp>(loc, alloca);
-  } else {
-    builder.create<mlir::omp::YieldOp>(loc, init);
-  }
+  mlir::Value init =
+      createReductionInitRegion(builder, loc, redId, type, isByRef);
+  builder.create<mlir::omp::YieldOp>(loc, init);
 
   builder.createBlock(&decl.getReductionRegion(),
                       decl.getReductionRegion().end(), {type, type},
@@ -320,19 +493,7 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
   builder.setInsertionPointToEnd(&decl.getReductionRegion().back());
   mlir::Value op1 = decl.getReductionRegion().front().getArgument(0);
   mlir::Value op2 = decl.getReductionRegion().front().getArgument(1);
-  mlir::Value outAddr = op1;
-
-  op1 = builder.loadIfRef(loc, op1);
-  op2 = builder.loadIfRef(loc, op2);
-
-  mlir::Value reductionOp =
-      createScalarCombiner(builder, loc, redId, type, op1, op2);
-  if (isByRef) {
-    builder.create<fir::StoreOp>(loc, reductionOp, outAddr);
-    builder.create<mlir::omp::YieldOp>(loc, outAddr);
-  } else {
-    builder.create<mlir::omp::YieldOp>(loc, reductionOp);
-  }
+  genCombiner(builder, loc, redId, type, op1, op2, isByRef);
 
   return decl;
 }
@@ -387,13 +548,33 @@ void ReductionProcessor::addReductionDecl(
 
   // initial pass to collect all reduction vars so we can figure out if this
   // should happen byref
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   for (const Object &object : objectList) {
     const Fortran::semantics::Symbol *symbol = object.id();
     if (reductionSymbols)
       reductionSymbols->push_back(symbol);
     mlir::Value symVal = converter.getSymbolAddress(*symbol);
-    if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+    auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+
+    // all arrays must be boxed so that we have convenient access to all the
+    // information needed to iterate over the array
+    if (mlir::isa<fir::SequenceType>(redType.getEleTy())) {
+      hlfir::Entity entity{symVal};
+      entity = genVariableBox(currentLocation, builder, entity);
+      mlir::Value box = entity.getBase();
+
+      // Always pass the box by reference so that the OpenMP dialect
+      // verifiers don't need to know anything about fir.box
+      auto alloca =
+          builder.create<fir::AllocaOp>(currentLocation, box.getType());
+      builder.create<fir::StoreOp>(currentLocation, box, alloca);
+
+      symVal = alloca;
+      redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+    } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
       symVal = declOp.getBase();
+    }
+
     reductionVars.push_back(symVal);
   }
   const bool isByRef = doReductionByRef(reductionVars);
@@ -418,24 +599,17 @@ void ReductionProcessor::addReductionDecl(
       break;
     }
 
-    for (const Object &object : objectList) {
-      const Fortran::semantics::Symbol *symbol = object.id();
-      mlir::Value symVal = converter.getSymbolAddress(*symbol);
-      if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-        symVal = declOp.getBase();
-      auto redType = symVal.getType().cast<fir::ReferenceType>();
+    for (mlir::Value symVal : reductionVars) {
+      auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
       if (redType.getEleTy().isa<fir::LogicalType>())
         decl = createReductionDecl(
             firOpBuilder,
             getReductionName(intrinsicOp, firOpBuilder.getI1Type(), isByRef),
             redId, redType, currentLocation, isByRef);
-      else if (redType.getEleTy().isIntOrIndexOrFloat()) {
+      else
         decl = createReductionDecl(
             firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
             redId, redType, currentLocation, isByRef);
-      } else {
-        TODO(currentLocation, "Reduction of some types is not supported");
-      }
       reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
           firOpBuilder.getContext(), decl.getSymName()));
     }
@@ -452,8 +626,8 @@ void ReductionProcessor::addReductionDecl(
         if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
           symVal = declOp.getBase();
         auto redType = symVal.getType().cast<fir::ReferenceType>();
-        assert(redType.getEleTy().isIntOrIndexOrFloat() &&
-               "Unsupported reduction type");
+        if (!redType.getEleTy().isIntOrIndexOrFloat())
+          TODO(currentLocation, "User Defined Reduction on non-trivial type");
         decl = createReductionDecl(
             firOpBuilder,
             getReductionName(getRealName(*reductionIntrinsic).ToString(),

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -108,7 +108,7 @@ class ReductionProcessor {
   /// Creates an OpenMP reduction declaration and inserts it into the provided
   /// symbol table. The declaration has a constant initializer with the neutral
   /// value `initValue`, and the reduction combiner carried over from `reduce`.
-  /// TODO: Generalize this for non-integer types, add atomic region.
+  /// TODO: add atomic region.
   static mlir::omp::ReductionDeclareOp
   createReductionDecl(fir::FirOpBuilder &builder,
                       llvm::StringRef reductionOpName,

diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -1111,6 +1111,35 @@ hlfir::createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
   return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc};
 }
 
+hlfir::Entity hlfir::createStackTempFromMold(mlir::Location loc,
+                                             fir::FirOpBuilder &builder,
+                                             hlfir::Entity mold) {
+  llvm::SmallVector<mlir::Value> lenParams;
+  hlfir::genLengthParameters(loc, builder, mold, lenParams);
+  llvm::StringRef tmpName{".tmp"};
+  mlir::Value alloc;
+  mlir::Value shape{};
+  fir::FortranVariableFlagsAttr declAttrs;
+
+  if (mold.isPolymorphic()) {
+    // genAllocatableApplyMold does heap allocation
+    TODO(loc, "createStackTempFromMold for polymorphic type");
+  } else if (mold.isArray()) {
+    mlir::Type sequenceType =
+        hlfir::getFortranElementOrSequenceType(mold.getType());
+    shape = hlfir::genShape(loc, builder, mold);
+    auto extents = hlfir::getIndexExtents(loc, builder, shape);
+    alloc =
+        builder.createTemporary(loc, sequenceType, tmpName, extents, lenParams);
+  } else {
+    alloc = builder.createTemporary(loc, mold.getFortranElementType(), tmpName,
+                                    /*shape=*/std::nullopt, lenParams);
+  }
+  auto declareOp = builder.create<hlfir::DeclareOp>(loc, alloc, tmpName, shape,
+                                                    lenParams, declAttrs);
+  return hlfir::Entity{declareOp.getBase()};
+}
+
 hlfir::EntityWithAttributes
 hlfir::convertCharacterKind(mlir::Location loc, fir::FirOpBuilder &builder,
                             hlfir::Entity scalarChar, int toKind) {

diff --git a/flang/test/Lower/OpenMP/Todo/reduction-arrays.f90 b/flang/test/Lower/OpenMP/Todo/reduction-arrays.f90