Skip to content

[mlir][OpenMP][flang] make private variable allocation implicit in omp.private #124019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 126 additions & 63 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@

#include "DataSharingProcessor.h"

#include "PrivateReductionUtils.h"
#include "Utils.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Semantics/attr.h"
#include "flang/Semantics/tools.h"

namespace Fortran {
Expand Down Expand Up @@ -85,35 +89,65 @@ void DataSharingProcessor::insertDeallocs() {
converter.createHostAssociateVarCloneDealloc(*sym);
continue;
}

lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");
mlir::Type symType = hsb.getAddr().getType();
mlir::Location symLoc = hsb.getAddr().getLoc();
fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
mlir::omp::PrivateClauseOp privatizer = symToPrivatizer.at(sym);

lower::SymMapScope scope(symTable);
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);

mlir::Region &deallocRegion = privatizer.getDeallocRegion();
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Block *deallocEntryBlock = firOpBuilder.createBlock(
&deallocRegion, /*insertPt=*/{}, symType, symLoc);

firOpBuilder.setInsertionPointToEnd(deallocEntryBlock);
symTable.addSymbol(*sym,
fir::substBase(symExV, deallocRegion.getArgument(0)));

converter.createHostAssociateVarCloneDealloc(*sym);
firOpBuilder.create<mlir::omp::YieldOp>(hsb.getAddr().getLoc());
// For delayed privatization deallocs are created by
// populateByRefInitAndCleanupRegions
}
}

void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
bool success = converter.createHostAssociateVarClone(
*sym, /*skipDefaultInit=*/isFirstPrivate);

// If we are doing eager-privatization on a symbol created using delayed
// privatization there could be incompatible types here e.g.
// fir.ref<fir.box<fir.array<>>>
bool success = [&]() -> bool {
const auto *details =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
assert(details && "No host-association found");
const Fortran::semantics::Symbol &hsym = details->symbol();
mlir::Value addr = converter.getSymbolAddress(hsym);

if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(addr.getType())) {
if (auto boxTy = mlir::dyn_cast<fir::BoxType>(refTy.getElementType())) {
if (auto arrayTy =
mlir::dyn_cast<fir::SequenceType>(boxTy.getElementType())) {
// FirConverter/fir::ExtendedValue considers all references to boxes
// as mutable boxes. Outside of OpenMP it doesn't make sense to have a
// mutable box of an array. Work around this here by loading the
// reference so it is a normal boxed array.
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.genLocation(hsym.name());
fir::ExtendedValue hexv = converter.getSymbolExtendedValue(hsym);

llvm::SmallVector<mlir::Value> extents =
fir::factory::getExtents(loc, builder, hexv);

// TODO: uniqName, name
mlir::Value allocVal =
builder.allocateLocal(loc, arrayTy, /*uniqName=*/"",
/*name=*/"", extents, /*typeParams=*/{},
sym->GetUltimate().attrs().test(
Fortran::semantics::Attr::TARGET));
mlir::Value shape = builder.genShape(loc, extents);
mlir::Value box = builder.createBox(loc, boxTy, allocVal, shape,
nullptr, {}, nullptr);

// This can't be a CharArrayBoxValue because otherwise
// boxTy.getElementType() would be a character type.
// Assume the array element type isn't polymorphic because we are
// privatizing.
fir::ExtendedValue newExv = fir::ArrayBoxValue{box, extents};

converter.bindSymbol(*sym, newExv);
return true;
}
}
}

// Normal case:
return converter.createHostAssociateVarClone(
*sym, /*skipDefaultInit=*/isFirstPrivate);
}();
(void)success;
assert(success && "Privatization failed due to existing binding");

Expand All @@ -132,7 +166,7 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {

if (needInitClone()) {
Fortran::lower::initializeCloneAtRuntime(converter, *sym, symTable);
callsInitClone = true;
mightHaveReadHostSym = true;
}
}

Expand Down Expand Up @@ -184,7 +218,8 @@ bool DataSharingProcessor::needBarrier() {
// Emit implicit barrier for linear clause. Maybe on somewhere else.
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate) &&
(sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || callsInitClone))
(sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
mightHaveReadHostSym))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry @tblah for the late comment to this PR. I am looking into an issue that came up recently, and I believe it's related to this check here and your extensions to initialize that flag also during delayed privatization. So, I wanted to ask about it to try to understand it better, and see what the best solution is.

As far as I can tell, the mightHaveReadHostSym flag is set globally. That is, whenever a symbol is known to either initialize a clone or to have a privatizer with an init mold argument that is used, this will be true. Then, a barrier will be inserted e.g. if it is true and there are any lastprivate symbols. Is that the intended behavior?

My concern is that it doesn't look at whether the lastprivate symbol is the same one that caused the flag to be set. This is something that is triggered in the following example:

subroutine simd()
  integer :: i, j
  integer, dimension ( 3 ) :: x

  !$omp simd collapse(2) private(x)
  do i = 1, 10
    do j = 1, 10
      call foo(x)
    end do
  end do
  !$omp end simd
end subroutine simd

The i variable is implicitly marked as lastprivate (not sure where that's done, but I'm assuming it's simd-specific behavior) and there is a private clause for a non-primitive type, so this results in the insertion of a barrier. However, replacing simd for do no longer causes this issue because i is no longer lastprivate then. I was just wondering if it's on purpose that we're adding a barrier due to different symbols meeting each side of the condition.

I guess that would have been triggered previously as well, it's just that this change made it also happen for delayed privatization, which then impacts composite construct lowering and ends up triggering an MLIR verifier error for target SPMD due to the added barrier.

Maybe @ergawy or @luporl can also chime in if you know whether this is the expected behavior. I have a small patch updating mightHaveReadHostSym so that a barrier would not be inserted in this case, which I can share if this isn't expected behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIU, a barrier should not be needed in this case, since it is not possible to have concurrent read/write of the same memory region, as i is only written to in the last iteration and, as it seems, x is only read during privatization (this could probably be optimized, since x's shape is known at compile time).

I hadn't noticed it before, but having a single mightHaveReadHostSym variable for all privatized symbols is a problem indeed. A possible solution would be to have a per-symbol mightHaveReadHostSym.

Copy link
Member

@skatrak skatrak Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the quick response. I just created #127074 to address the second part of the issue. Though it looks like, from what you're saying, we might want to treat induction variables differently as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the quick fix.
I only wanted to point out that, in this case, x wouldn't even need to be read at all.
Actually, this optimization is implemented in #125901.

Do you think there is any advantage in treating induction variables differently?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thank you for the explanation, that makes sense. I would try to have as few edge cases as possible, so if we can get away with treating every variable the same way, I'd prefer it.

DO loop bounds have to be scalars, so it seems to me that the induction variable couldn't trigger this via the mightHaveReadHostSym=true path. Also, semantics triggers an error about passing a loop induction variable into a firstprivate clause, so it seems like we should be fine without adding an edge case for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for my slow response @skatrak, I was on holiday. Thanks for fixing it, and thanks @luporl for chiming in.

return true;
}
return false;
Expand Down Expand Up @@ -468,15 +503,47 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");

mlir::Type symType = hsb.getAddr().getType();
mlir::Location symLoc = hsb.getAddr().getLoc();
std::string privatizerName = sym->name().ToString() + ".privatizer";
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);

mlir::Value privVal = hsb.getAddr();
mlir::Type allocType = privVal.getType();
if (!mlir::isa<fir::PointerType>(privVal.getType()))
allocType = fir::unwrapRefType(privVal.getType());

if (auto poly = mlir::dyn_cast<fir::ClassType>(allocType)) {
if (!mlir::isa<fir::PointerType>(poly.getEleTy()) && isFirstPrivate)
TODO(symLoc, "create polymorphic host associated copy");
}

// fir.array<> cannot be converted to any single llvm type and fir helpers
// are not available in openmp to llvmir translation so we cannot generate
// an alloca for a fir.array type there. Get around this by boxing all
// arrays.
if (mlir::isa<fir::SequenceType>(allocType)) {
hlfir::Entity entity{hsb.getAddr()};
entity = genVariableBox(symLoc, firOpBuilder, entity);
privVal = entity.getBase();
allocType = privVal.getType();
}

if (mlir::isa<fir::BaseBoxType>(privVal.getType())) {
// Boxes should be passed by reference into nested regions:
auto oldIP = firOpBuilder.saveInsertionPoint();
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
auto alloca = firOpBuilder.create<fir::AllocaOp>(symLoc, privVal.getType());
firOpBuilder.restoreInsertionPoint(oldIP);
firOpBuilder.create<fir::StoreOp>(symLoc, privVal, alloca);
privVal = alloca;
}

mlir::Type argType = privVal.getType();

mlir::omp::PrivateClauseOp privatizerOp = [&]() {
auto moduleOp = firOpBuilder.getModule();
auto uniquePrivatizerName = fir::getTypeAsString(
symType, converter.getKindMap(),
allocType, converter.getKindMap(),
converter.mangleName(*sym) +
(isFirstPrivate ? "_firstprivate" : "_private"));

Expand All @@ -488,44 +555,40 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
firOpBuilder.setInsertionPointToStart(moduleOp.getBody());
auto result = firOpBuilder.create<mlir::omp::PrivateClauseOp>(
symLoc, uniquePrivatizerName, symType,
symLoc, uniquePrivatizerName, allocType,
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
lower::SymMapScope outerScope(symTable);

// Populate the `alloc` region.
{
mlir::Region &allocRegion = result.getAllocRegion();
mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
&allocRegion, /*insertPt=*/{}, symType, symLoc);

firOpBuilder.setInsertionPointToEnd(allocEntryBlock);

fir::ExtendedValue localExV =
hlfir::translateToExtendedValue(
symLoc, firOpBuilder, hlfir::Entity{allocRegion.getArgument(0)},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
.first;

symTable.addSymbol(*sym, localExV);
lower::SymMapScope innerScope(symTable);
cloneSymbol(sym);
mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr();
mlir::Type cloneType = cloneAddr.getType();

// A `convert` op is required for variables that are storage associated
// via `equivalence`. The problem is that these variables are declared as
// `fir.ptr`s while their privatized storage is declared as `fir.ref`,
// therefore we convert to proper symbol type.
mlir::Value yieldedValue =
(symType == cloneType) ? cloneAddr
: firOpBuilder.createConvert(
cloneAddr.getLoc(), symType, cloneAddr);

firOpBuilder.create<mlir::omp::YieldOp>(hsb.getAddr().getLoc(),
yieldedValue);
// Populate the `init` region.
// We need to initialize in the following cases:
// 1. The allocation was for a derived type which requires initialization
// (this can be skipped if it will be initialized anyway by the copy
// region, unless the derived type has allocatable components)
// 2. The allocation was for any kind of box
// 3. The allocation was for a boxed character
const bool needsInitialization =
(Fortran::lower::hasDefaultInitialization(sym->GetUltimate()) &&
(!isFirstPrivate || hlfir::mayHaveAllocatableComponent(allocType))) ||
mlir::isa<fir::BaseBoxType>(allocType) ||
mlir::isa<fir::BoxCharType>(allocType);
if (needsInitialization) {
mlir::Region &initRegion = result.getInitRegion();
mlir::Block *initBlock = firOpBuilder.createBlock(
&initRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});

populateByRefInitAndCleanupRegions(
converter, symLoc, argType, /*scalarInitValue=*/nullptr, initBlock,
result.getInitPrivateArg(), result.getInitMoldArg(),
result.getDeallocRegion(),
isFirstPrivate ? DeclOperationKind::FirstPrivate
: DeclOperationKind::Private,
sym);
// TODO: currently there are false positives from dead uses of the mold
// arg
if (!result.getInitMoldArg().getUses().empty())
mightHaveReadHostSym = true;
}

// Populate the `copy` region if this is a `firstprivate`.
Expand All @@ -534,7 +597,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
// First block argument corresponding to the original/host value while
// second block argument corresponding to the privatized value.
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
&copyRegion, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
&copyRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);

auto addSymbol = [&](unsigned argIdx, bool force = false) {
Expand Down Expand Up @@ -565,7 +628,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,

if (clauseOps) {
clauseOps->privateSyms.push_back(mlir::SymbolRefAttr::get(privatizerOp));
clauseOps->privateVars.push_back(hsb.getAddr());
clauseOps->privateVars.push_back(privVal);
}

symToPrivatizer[sym] = privatizerOp;
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Lower/OpenMP/DataSharingProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class DataSharingProcessor {
lower::pft::Evaluation &eval;
bool shouldCollectPreDeterminedSymbols;
bool useDelayedPrivatization;
bool callsInitClone = false;
bool mightHaveReadHostSym = false;
lower::SymMap &symTable;
OMPConstructSymbolVisitor visitor;

Expand Down
Loading