-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][OpenMP][NFC] delayed privatisation cleanup #115298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Upstreaming some code cleanups ahead of supporting delayed task execution. - Make allocatePrivateVars not need to be a template (it will need to operate separately on firstprivate and private variables for delayed task execution so it can't index into lists of all variables). - Use llvm::SmallVectorImpl for function arguments - collectPrivatizationDecls already reserves size for privateDecls so we don't need to do that in callers - Use llvm::zip_equal instead of C-style array indexing
@llvm/pr-subscribers-mlir-openmp @llvm/pr-subscribers-flang-openmp Author: Tom Eccles (tblah) ChangesUpstreaming some code cleanups ahead of supporting delayed task execution.
Full diff: https://github.com/llvm/llvm-project/pull/115298.diff 1 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ce2fc390db2b06..da11ee9960e1f9 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1254,13 +1254,13 @@ static LogicalResult allocAndInitializeReductionVars(
/// Allocate delayed private variables. Returns the basic block which comes
/// after all of these allocations. llvm::Value * for each of these private
/// variables are populated in llvmPrivateVars.
-template <class OP>
static llvm::Expected<llvm::BasicBlock *>
-allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
+allocatePrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
MutableArrayRef<BlockArgument> privateBlockArgs,
MutableArrayRef<omp::PrivateClauseOp> privateDecls,
- llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
+ MutableArrayRef<mlir::Value> mlirPrivateVars,
+ llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
// Allocate private vars
llvm::BranchInst *allocaTerminator =
@@ -1285,19 +1285,18 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
llvm::BasicBlock *privAllocBlock = nullptr;
if (!privateBlockArgs.empty())
privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
- for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
- Region &allocRegion = privateDecls[i].getAllocRegion();
+ for (auto [privDecl, mlirPrivVar, blockArg] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
+ Region &allocRegion = privDecl.getAllocRegion();
// map allocation region block argument
- llvm::Value *nonPrivateVar =
- moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
assert(nonPrivateVar);
- moduleTranslation.mapValue(privateDecls[i].getAllocMoldArg(),
- nonPrivateVar);
+ moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);
// in-place convert the private allocation region
SmallVector<llvm::Value *, 1> phis;
- if (privateDecls[i].getAllocMoldArg().getUses().empty()) {
+ if (privDecl.getAllocMoldArg().getUses().empty()) {
// TODO this should use
// allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
// the code for fetching the thread id. Not doing this for now to avoid
@@ -1313,7 +1312,7 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
assert(phis.size() == 1 && "expected one allocation to be yielded");
- moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
+ moduleTranslation.mapValue(blockArg, phis[0]);
llvmPrivateVars.push_back(phis[0]);
// clear alloc region block argument mapping in case it needs to be
@@ -1561,11 +1560,14 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Collect delayed privatisation declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
- privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(taskOp, privateDecls);
+ for (mlir::Value privateVar : taskOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codegenIP) -> llvm::Error {
@@ -1575,8 +1577,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation, allocaIP);
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
- llvmPrivateVars, allocaIP);
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
@@ -1879,11 +1881,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Collect delayed privatization declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
- privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(opInst, privateDecls);
+ for (mlir::Value privateVar : opInst.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
// Collect reduction declarations
SmallVector<omp::DeclareReductionOp> reductionDecls;
@@ -1895,8 +1900,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP,
InsertPointTy codeGenIP) -> llvm::Error {
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
- llvmPrivateVars, allocaIP);
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *opInst).failed())
return llvm::make_error<PreviouslyReportedError>();
|
@llvm/pr-subscribers-mlir-llvm Author: Tom Eccles (tblah) ChangesUpstreaming some code cleanups ahead of supporting delayed task execution.
Full diff: https://github.com/llvm/llvm-project/pull/115298.diff 1 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ce2fc390db2b06..da11ee9960e1f9 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1254,13 +1254,13 @@ static LogicalResult allocAndInitializeReductionVars(
/// Allocate delayed private variables. Returns the basic block which comes
/// after all of these allocations. llvm::Value * for each of these private
/// variables are populated in llvmPrivateVars.
-template <class OP>
static llvm::Expected<llvm::BasicBlock *>
-allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
+allocatePrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
MutableArrayRef<BlockArgument> privateBlockArgs,
MutableArrayRef<omp::PrivateClauseOp> privateDecls,
- llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
+ MutableArrayRef<mlir::Value> mlirPrivateVars,
+ llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
// Allocate private vars
llvm::BranchInst *allocaTerminator =
@@ -1285,19 +1285,18 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
llvm::BasicBlock *privAllocBlock = nullptr;
if (!privateBlockArgs.empty())
privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
- for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
- Region &allocRegion = privateDecls[i].getAllocRegion();
+ for (auto [privDecl, mlirPrivVar, blockArg] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
+ Region &allocRegion = privDecl.getAllocRegion();
// map allocation region block argument
- llvm::Value *nonPrivateVar =
- moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
assert(nonPrivateVar);
- moduleTranslation.mapValue(privateDecls[i].getAllocMoldArg(),
- nonPrivateVar);
+ moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);
// in-place convert the private allocation region
SmallVector<llvm::Value *, 1> phis;
- if (privateDecls[i].getAllocMoldArg().getUses().empty()) {
+ if (privDecl.getAllocMoldArg().getUses().empty()) {
// TODO this should use
// allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
// the code for fetching the thread id. Not doing this for now to avoid
@@ -1313,7 +1312,7 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
assert(phis.size() == 1 && "expected one allocation to be yielded");
- moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
+ moduleTranslation.mapValue(blockArg, phis[0]);
llvmPrivateVars.push_back(phis[0]);
// clear alloc region block argument mapping in case it needs to be
@@ -1561,11 +1560,14 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Collect delayed privatisation declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
- privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(taskOp, privateDecls);
+ for (mlir::Value privateVar : taskOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codegenIP) -> llvm::Error {
@@ -1575,8 +1577,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation, allocaIP);
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
- llvmPrivateVars, allocaIP);
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
@@ -1879,11 +1881,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Collect delayed privatization declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
- privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(opInst, privateDecls);
+ for (mlir::Value privateVar : opInst.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
// Collect reduction declarations
SmallVector<omp::DeclareReductionOp> reductionDecls;
@@ -1895,8 +1900,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP,
InsertPointTy codeGenIP) -> llvm::Error {
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
- llvmPrivateVars, allocaIP);
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *opInst).failed())
return llvm::make_error<PreviouslyReportedError>();
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Upstreaming some code cleanups ahead of supporting delayed task execution. - Make allocatePrivateVars not need to be a template (it will need to operate separately on firstprivate and private variables for delayed task execution so it can't index into lists of all variables in the operation). - Use llvm::SmallVectorImpl for function arguments - collectPrivatizationDecls already reserves size for privateDecls so we don't need to do that in callers - Use llvm::zip_equal instead of C-style array indexing
Upstreaming some code cleanups ahead of supporting delayed task execution.