Skip to content

[mlir][OpenMP][NFC] delayed privatisation cleanup #115298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 22 additions & 17 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1254,13 +1254,13 @@ static LogicalResult allocAndInitializeReductionVars(
/// Allocate delayed private variables. Returns the basic block which comes
/// after all of these allocations. llvm::Value * for each of these private
/// variables are populated in llvmPrivateVars.
template <class OP>
static llvm::Expected<llvm::BasicBlock *>
allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
allocatePrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
MutableArrayRef<BlockArgument> privateBlockArgs,
MutableArrayRef<omp::PrivateClauseOp> privateDecls,
llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
// Allocate private vars
llvm::BranchInst *allocaTerminator =
Expand All @@ -1285,19 +1285,18 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
llvm::BasicBlock *privAllocBlock = nullptr;
if (!privateBlockArgs.empty())
privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
Region &allocRegion = privateDecls[i].getAllocRegion();
for (auto [privDecl, mlirPrivVar, blockArg] :
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
Region &allocRegion = privDecl.getAllocRegion();

// map allocation region block argument
llvm::Value *nonPrivateVar =
moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(privateDecls[i].getAllocMoldArg(),
nonPrivateVar);
moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);

// in-place convert the private allocation region
SmallVector<llvm::Value *, 1> phis;
if (privateDecls[i].getAllocMoldArg().getUses().empty()) {
if (privDecl.getAllocMoldArg().getUses().empty()) {
// TODO this should use
// allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
// the code for fetching the thread id. Not doing this for now to avoid
Expand All @@ -1313,7 +1312,7 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,

assert(phis.size() == 1 && "expected one allocation to be yielded");

moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
moduleTranslation.mapValue(blockArg, phis[0]);
llvmPrivateVars.push_back(phis[0]);

// clear alloc region block argument mapping in case it needs to be
Expand Down Expand Up @@ -1561,11 +1560,14 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Collect delayed privatisation declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(taskOp, privateDecls);
for (mlir::Value privateVar : taskOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codegenIP) -> llvm::Error {
Expand All @@ -1575,8 +1577,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation, allocaIP);

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
llvmPrivateVars, allocaIP);
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();

Expand Down Expand Up @@ -1879,11 +1881,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Collect delayed privatization declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(opInst, privateDecls);
for (mlir::Value privateVar : opInst.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

// Collect reduction declarations
SmallVector<omp::DeclareReductionOp> reductionDecls;
Expand All @@ -1895,8 +1900,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP,
InsertPointTy codeGenIP) -> llvm::Error {
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
llvmPrivateVars, allocaIP);
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *opInst).failed())
return llvm::make_error<PreviouslyReportedError>();

Expand Down
Loading