Skip to content

[Flang][MLIR][OpenMP] - Add support for firstprivate when translating omp.target ops from MLIR to LLVMIR #131213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1694,13 +1694,14 @@ static void genTargetClauses(
cp.processNowait(clauseOps);
cp.processThreadLimit(stmtCtx, clauseOps);

cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
clause::InReduction, clause::UsesAllocators>(
loc, llvm::omp::Directive::OMPD_target);
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::InReduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);

// `target private(..)` is only supported in delayed privatization mode.
if (!enableDelayedPrivatizationStaging)
cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
cp.processTODO<clause::Firstprivate, clause::Private>(
loc, llvm::omp::Directive::OMPD_target);
}

static void genTargetDataClauses(
Expand Down
70 changes: 64 additions & 6 deletions flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
#include <type_traits>

#define DEBUG_TYPE "omp-maps-for-privatized-symbols"

#define PDBGS() (llvm::dbgs() << "[" << DEBUG_TYPE << "]: ")
namespace flangomp {
#define GEN_PASS_DEF_MAPSFORPRIVATIZEDSYMBOLSPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
} // namespace flangomp

using namespace mlir;

namespace {
class MapsForPrivatizedSymbolsPass
: public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
Expand All @@ -60,14 +62,14 @@ class MapsForPrivatizedSymbolsPass
// We want the first result of the hlfir.declare op because our goal
// is to map the descriptor (fir.box or fir.boxchar) and the first
// result for hlfir.declare is the descriptor if a the symbol being
// decalred needs a descriptor.
// declared needs a descriptor.
// Some types are boxed immediately before privatization. These have other
// operations in between the privatization and the declaration. It is safe
// to use var directly here because they will be boxed anyway.
if (auto declOp = llvm::dyn_cast_if_present<hlfir::DeclareOp>(definingOp))
varPtr = declOp.getBase();

// If we do not have a reference to descritor, but the descriptor itself
// If we do not have a reference to a descriptor but the descriptor itself,
// then we need to store that on the stack so that we can map the
// address of the descriptor.
if (mlir::isa<fir::BaseBoxType>(varPtr.getType()) ||
Expand All @@ -81,6 +83,15 @@ class MapsForPrivatizedSymbolsPass
builder.create<fir::StoreOp>(loc, varPtr, alloca);
varPtr = alloca;
}
assert(mlir::isa<omp::PointerLikeType>(varPtr.getType()) &&
"Dealing with a varPtr that is not a PointerLikeType");

// Figure out the bounds because knowing the bounds will help the subsequent
// MapInfoFinalizationPass map the underlying data of the descriptor.
llvm::SmallVector<mlir::Value> boundsOps;
if (needsBoundsOps(varPtr))
genBoundsOps(builder, varPtr, boundsOps);

return builder.create<omp::MapInfoOp>(
loc, varPtr.getType(), varPtr,
TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType())
Expand All @@ -92,7 +103,7 @@ class MapsForPrivatizedSymbolsPass
/*varPtrPtr=*/Value{},
/*members=*/SmallVector<Value>{},
/*member_index=*/mlir::ArrayAttr{},
/*bounds=*/ValueRange{},
/*bounds=*/boundsOps,
/*mapperId=*/mlir::FlatSymbolRefAttr(), /*name=*/StringAttr(),
builder.getBoolAttr(false));
}
Expand Down Expand Up @@ -143,8 +154,8 @@ class MapsForPrivatizedSymbolsPass
omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder);
mapInfoOps.push_back(mapInfoOp);

LLVM_DEBUG(llvm::dbgs() << "MapsForPrivatizedSymbolsPass created ->\n");
LLVM_DEBUG(mapInfoOp.dump());
LLVM_DEBUG(PDBGS() << "MapsForPrivatizedSymbolsPass created ->\n"
<< mapInfoOp << "\n");
}
if (!mapInfoOps.empty()) {
mapInfoOpsForTarget.insert({targetOp.getOperation(), mapInfoOps});
Expand All @@ -158,5 +169,52 @@ class MapsForPrivatizedSymbolsPass
}
}
}
// As the name suggests, this function examines var to determine if
// it has dynamic size. If true, this pass'll have to extract these
// bounds from descriptor of var and add the bounds to the resultant
// MapInfoOp.
bool needsBoundsOps(mlir::Value var) {
assert(mlir::isa<omp::PointerLikeType>(var.getType()) &&
"needsBoundsOps can deal only with pointer types");
mlir::Type t = fir::unwrapRefType(var.getType());
// t could be a box, so look inside the box
auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t);
if (innerType)
return fir::hasDynamicSize(innerType);
return fir::hasDynamicSize(t);
}

// TODO: Remove this in favor of fir::factory::genImplicitBoundsOps
// in a subsequent PR.
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for not catching this before, but can we use fir::factory::genImplicitBoundsOps instead of introducing a new util? If so, we can do that in a later PR (we can just add a todo here).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, make sense. This other boxchar problem I am working on has a couple of places where bounds need to be added. I was planning to roll in a consolidation of this functionality with that PR. So, how about I do with then. I'll update this PR with a todo in the morning before merging.

llvm::SmallVector<mlir::Value> &boundsOps) {
if (!fir::isBoxAddress(var.getType()))
return;

unsigned int rank = 0;
rank = fir::getBoxRank(fir::unwrapRefType(var.getType()));
mlir::Location loc = var.getLoc();
mlir::Type idxTy = builder.getIndexType();
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
mlir::Type boundTy = builder.getType<omp::MapBoundsType>();
mlir::Value box = builder.create<fir::LoadOp>(loc, var);
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value dimNo = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dimNo);
mlir::Value lb = dimInfo.getLowerBound();
mlir::Value extent = dimInfo.getExtent();
mlir::Value byteStride = dimInfo.getByteStride();
mlir::Value ub = builder.create<mlir::arith::SubIOp>(loc, extent, one);

mlir::Value boundsOp = builder.create<omp::MapBoundsOp>(
loc, boundTy, /*lower_bound=*/zero,
/*upper_bound=*/ub, /*extent=*/extent, /*stride=*/byteStride,
/*stride_in_bytes = */ true, /*start_idx=*/lb);
LLVM_DEBUG(PDBGS() << "Created BoundsOp " << boundsOp << "\n");
boundsOps.push_back(boundsOp);
}
}
};
} // namespace
8 changes: 7 additions & 1 deletion flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,14 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
pm.addPass(flangomp::createDoConcurrentConversionPass(
opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));

pm.addPass(flangomp::createMapInfoFinalizationPass());
// The MapsForPrivatizedSymbols pass needs to run before
// MapInfoFinalizationPass because the former creates new
// MapInfoOp instances, typically for descriptors.
// MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
// underlying data which is necessary to access the data on the offload
// target device.
pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
pm.addPass(flangomp::createMapInfoFinalizationPass());
pm.addPass(flangomp::createMarkDeclareTargetPass());
pm.addPass(flangomp::createGenericLoopConversionPass());
if (opts.isTargetDevice)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ end subroutine target_allocatable
! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca [[DESC_TYPE]]
! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
! CHECK: %[[BASE_ADDR:.*]] = fir.box_offset %[[VAR_DECL]]#0 base_addr : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> [[MEMBER_TYPE:.*]]
! CHECK: %[[MEMBER:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : [[MEMBER_TYPE:.*]]) -> {{.*}}
! CHECK: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]]) map_clauses(to) capture(ByRef) members(%[[MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>>

! CHECK: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]])
! CHECK-SAME: map_clauses(to) capture(ByRef) -> [[TYPE]]
! CHECK: omp.target map_entries(%[[MAP_VAR]] -> %arg0 : [[TYPE]]) private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
! CHECK: omp.target map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} [map_idx=0] : [[TYPE]]) {
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,19 @@ end subroutine target_allocatable
! CHECK: %[[REAL_ARR_DECL:.*]]:2 = hlfir.declare %[[REAL_ARR_ALLOC]]({{.*}})
! CHECK: fir.store %[[REAL_ARR_DECL]]#0 to %[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: %[[MAPPED_MI0:.*]] = omp.map.info var_ptr(%[[MAPPED_DECL]]#1 : !fir.ref<i32>, i32) {{.*}}
! CHECK: %[[ALLOC_VAR_MAP:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>)
! CHECK: %[[REAL_ARR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>)
! CHECK: %[[ALLOC_VAR_MEMBER:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, i32)
! CHECK: %[[ALLOC_VAR_MAP:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) {{.*}} members(%[[ALLOC_VAR_MEMBER]] :
! CHECK: %[[REAL_ARR_MEMBER:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32)
! CHECK: %[[REAL_ARR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) {{.*}} members(%[[REAL_ARR_MEMBER]] :
! CHECK: fir.store %[[CHAR_VAR_DECL]]#0 to %[[CHAR_VAR_DESC_ALLOCA]] : !fir.ref<!fir.boxchar<1>>
! CHECK: %[[CHAR_VAR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[CHAR_VAR_DESC_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>)
! CHECK: omp.target
! CHECK-SAME: map_entries(
! CHECK-SAME: %[[MAPPED_MI0]] -> %[[MAPPED_ARG0:[^,]+]],
! CHECK-SAME: %[[ALLOC_VAR_MAP]] -> %[[MAPPED_ARG1:[^,]+]]
! CHECK-SAME: %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]]
! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] :
! CHECK-SAME: !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>)
! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]]
! CHECK-SAME: !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<i32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
! CHECK-SAME: private(
! CHECK-SAME: @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=1],
! CHECK-SAME: @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]],
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/Todo/firstprivate-target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

integer :: i
! CHECK: not yet implemented: Unhandled clause FIRSTPRIVATE in TARGET construct
!$omp target firstprivate(i)
!$omp target firstprivate(i) nowait
!$omp end target

end program
15 changes: 8 additions & 7 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>

There are no restrictions on the body except for:
- The `dealloc` regions has a single argument.
- The `init & `copy` regions have 2 arguments.
- The `init` & `copy` regions have 2 arguments.
- All three regions are terminated by `omp.yield` ops.
The above restrictions and other obvious restrictions (e.g. verifying the
type of yielded values) are verified by the custom op verifier. The actual
Expand All @@ -90,15 +90,15 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
Instances of this op would then be used by ops that model directives that
accept data-sharing attribute clauses.

The $sym_name attribute provides a symbol by which the privatizer op can be
The `sym_name` attribute provides a symbol by which the privatizer op can be
referenced by other dialect ops.

The $type attribute is the type of the value being privatized. This type
The `type` attribute is the type of the value being privatized. This type
will be implicitly allocated in MLIR->LLVMIR conversion and passed as the
second argument to the init region. Therefore the type of arguments to
the regions should be a type which represents a pointer to $type.
the regions should be a type which represents a pointer to `type`.

The $data_sharing_type attribute specifies whether privatizer corresponds
The `data_sharing_type` attribute specifies whether privatizer corresponds
to a `private` or a `firstprivate` clause.
}];

Expand Down Expand Up @@ -161,9 +161,10 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
/// needsMap returns true if the value being privatized should additionally
/// be mapped to the target region using a MapInfoOp. This is most common
/// when an allocatable is privatized. In such cases, the descriptor is used
/// in privatization and needs to be mapped on to the device.
/// in privatization and needs to be mapped on to the device. The use of
/// firstprivate also creates the need to map the host variable to the device.
bool needsMap() {
return initReadsFromMold();
return readsFromMold();
}

/// Get the type for arguments to nested regions. This should
Expand Down
38 changes: 18 additions & 20 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,9 @@ static LogicalResult checkImplementationStatus(Operation &op) {
};
auto checkPrivate = [&todo](auto op, LogicalResult &result) {
if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
// Privatization clauses are supported, except on some situations, so we
// need to check here whether any of these unsupported cases are being
// translated.
if (std::optional<ArrayAttr> privateSyms = op.getPrivateSyms()) {
for (Attribute privatizerNameAttr : *privateSyms) {
omp::PrivateClauseOp privatizer = findPrivatizer(
op.getOperation(), cast<SymbolRefAttr>(privatizerNameAttr));

if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate)
result = todo("firstprivate");
}
}
// Privatization is supported only for included target tasks.
if (!op.getPrivateVars().empty() && op.getNowait())
result = todo("privatization for deferred target tasks");
} else {
if (!op.getPrivateVars().empty() || op.getPrivateSyms())
result = todo("privatization");
Expand Down Expand Up @@ -1465,6 +1455,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
assert(allocaTerminator->getNumSuccessors() == 1 &&
"This is an unconditional branch created by splitBB");

llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);

unsigned int allocaAS =
Expand All @@ -1491,12 +1482,12 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}

static LogicalResult
copyFirstPrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
ArrayRef<llvm::Value *> llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
static LogicalResult copyFirstPrivateVars(
llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
ArrayRef<llvm::Value *> llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
Expand All @@ -1520,7 +1511,8 @@ copyFirstPrivateVars(llvm::IRBuilderBase &builder,
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
llvm::Value *nonPrivateVar = findAssociatedValue(
mlirVar, builder, moduleTranslation, mappedPrivateVars);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

Expand Down Expand Up @@ -4860,6 +4852,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
.failed())
return llvm::make_error<PreviouslyReportedError>();

if (failed(copyFirstPrivateVars(
builder, moduleTranslation, privateVarsInfo.mlirVars,
privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
&mappedPrivateVars)))
return llvm::make_error<PreviouslyReportedError>();

SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateVarsInfo.privatizers,
std::back_inserter(privateCleanupRegions),
Expand Down
Loading