-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][OpenMP] Process omp.atomic.update
while translating scopes for target device
#132165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-openmp Author: Kareem Ergawy (ergawy) ChangesFixes a bug introduced by #130078. For non-BlockArgOpenMPOpInterface ops, we also want to map their entry block arguments to their operands, if any. For the current support in the OpenMP dialect, the table below lists all ops that have arguments (SSA operands and/or attributes) and not target-related. Of all these ops, we need to only process
Full diff: https://github.com/llvm/llvm-project/pull/132165.diff 2 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 722107c7ec6d7..cfc4e98ad3341 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5323,6 +5323,43 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
if (auto blockArgsIface =
dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
forwardArgs(moduleTranslation, blockArgsIface);
+ else {
+ // For non-BlockArgOpenMPOpInterface ops, we also want to map
+ // their entry block arguments to their operands, if any. For the
+ // current support in the OpenMP dialect, the table below lists
+ // all ops that have arguments (SSA operands and/or attributes)
+ // and not target-related. Of all these ops, we need to only
+ // process `omp.atomic.update` since it is the only op that has
+ // SSA operands & an attached region. Therefore, the region's
+ // entry block arguments must be mapped to the op's operands in
+ // case they are referenced inside the region.
+ //
+ // clang-format off
+ // =============================================================================
+ // | op | operands? | region(s)? | parent is func? | processed? |
+ // =============================================================================
+ // | atomic.read | yes | no | yes | no |
+ // | atomic.write | yes | no | yes | no |
+ // | atomic.update | yes | yes | yes | yes |
+ // | critical | no | no | yes | no |
+ // | declare_mapper | no | yes | no | no |
+ // | declare_reduction | no | yes | no | no |
+ // | flush | yes | no | yes | no |
+ // | private | no | yes | yes | no |
+ // | threadprivate | yes | no | yes | no |
+ // | yield | yes | no | yes | no |
+ // =============================================================================
+ // clang-format on
+ if (isa<mlir::omp::AtomicUpdateOp>(oper))
+ for (auto [operand, arg] :
+ llvm::zip_equal(oper->getOperands(),
+ oper->getRegion(0).getArguments())) {
+ moduleTranslation.mapValue(
+ arg, builder.CreateLoad(
+ moduleTranslation.convertType(arg.getType()),
+ moduleTranslation.lookupValue(operand)));
+ }
+ }
if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
assert(builder.GetInsertBlock() &&
@@ -5340,9 +5377,10 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
// translation of the OpenMP construct being converted (e.g. no
// OpenMP runtime calls will be generated). We just need this to
// prepare the kernel invocation args.
+ SmallVector<llvm::PHINode *> phis;
auto result = convertOmpOpRegions(
region, oper->getName().getStringRef().str() + ".fake.region",
- builder, moduleTranslation);
+ builder, moduleTranslation, &phis);
if (failed(handleError(result, *oper)))
return WalkResult::interrupt();
diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
index 71a4c29eaf0aa..c618b68d52aaf 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
@@ -97,6 +97,20 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
llvm.return
}
+ llvm.func @test_target_and_atomic_update(%x: !llvm.ptr, %expr : i32) {
+ omp.target {
+ omp.terminator
+ }
+
+ omp.atomic.update %x : !llvm.ptr {
+ ^bb0(%xval: i32):
+ %newval = llvm.add %xval, %expr : i32
+ omp.yield(%newval : i32)
+ }
+
+ llvm.return
+ }
+
// CHECK-LABEL: define void @test_nested_target_in_parallel_with_private({{.*}}) {
// CHECK: br label %omp.parallel.fake.region
// CHECK: omp.parallel.fake.region:
@@ -132,4 +146,11 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true,
// CHECK: call void @__kmpc_target_deinit()
// CHECK: ret void
// CHECK: }
+
+// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_target_and_atomic_update_{{.*}} {
+// CHECK: call i32 @__kmpc_target_init
+// CHECK: user_code.entry:
+// CHECK: call void @__kmpc_target_deinit()
+// CHECK: ret void
+// CHECK: }
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, LGTM. Just a couple of minor nits.
// For non-BlockArgOpenMPOpInterface ops, we also want to map | ||
// their entry block arguments to their operands, if any. For the | ||
// current support in the OpenMP dialect, the table below lists | ||
// all ops that have arguments (SSA operands and/or attributes) | ||
// and not target-related. Of all these ops, we need to only | ||
// process `omp.atomic.update` since it is the only op that has | ||
// SSA operands & an attached region. Therefore, the region's | ||
// entry block arguments must be mapped to the op's operands in | ||
// case they are referenced inside the region. | ||
// | ||
// clang-format off | ||
// ============================================================================= | ||
// | op | operands? | region(s)? | parent is func? | processed? | | ||
// ============================================================================= | ||
// | atomic.read | yes | no | yes | no | | ||
// | atomic.write | yes | no | yes | no | | ||
// | atomic.update | yes | yes | yes | yes | | ||
// | critical | no | no | yes | no | | ||
// | declare_mapper | no | yes | no | no | | ||
// | declare_reduction | no | yes | no | no | | ||
// | flush | yes | no | yes | no | | ||
// | private | no | yes | yes | no | | ||
// | threadprivate | yes | no | yes | no | | ||
// | yield | yes | no | yes | no | | ||
// ============================================================================= | ||
// clang-format on |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think having this detailed description of how things are at this point in time is very good information for the commit message. However, I don't think we want this as a comment in the code because it can very easily become outdated. The comment could be summarized to something along the lines of "Here we map entry block arguments of non-BlockArgOpenMPOpInterface ops if they can be encountered inside of a function and they define any of these arguments.".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
auto result = convertOmpOpRegions( | ||
region, oper->getName().getStringRef().str() + ".fake.region", | ||
builder, moduleTranslation); | ||
builder, moduleTranslation, &phis); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see phis
isn't used. Does passing the vector on its own cause a behavior change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, since omp.atomic.update
yields values (which are not used in this context), convertOmpOpRegions
asserts unless we pass a vector to collect the phis.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, and I guess not using these PHI nodes later doesn't cause issues. Thanks for the explanation!
…for target device Fixes a bug introduced by #130078. For non-BlockArgOpenMPOpInterface ops, we also want to map their entry block arguments to their operands, if any. For the current support in the OpenMP dialect, the table below lists all ops that have arguments (SSA operands and/or attributes) and not target-related. Of all these ops, we need to only process `omp.atomic.update` since it is the only op that has SSA operands & an attached region. Therefore, the region's entry block arguments must be mapped to the op's operands in case they are referenced inside the region. ============================================================================= | op | operands? | region(s)? | parent is func? | processed? | ============================================================================= | atomic.read | yes | no | yes | no | | atomic.write | yes | no | yes | no | | atomic.update | yes | yes | yes | yes | | critical | no | no | yes | no | | declare_mapper | no | yes | no | no | | declare_reduction | no | yes | no | no | | flush | yes | no | yes | no | | private | no | yes | yes | no | | threadprivate | yes | no | yes | no | | yield | yes | no | yes | no | =============================================================================
db9c57c
to
533a643
Compare
…for target device (llvm#132165) Fixes a bug introduced by llvm#130078. For non-BlockArgOpenMPOpInterface ops, we also want to map their entry block arguments to their operands, if any. For the current support in the OpenMP dialect, the table below lists all ops that have arguments (SSA operands and/or attributes) and not target-related. Of all these ops, we need to only process `omp.atomic.update` since it is the only op that has SSA operands & an attached region. Therefore, the region's entry block arguments must be mapped to the op's operands in case they are referenced inside the region. | op | operands? | region(s)? | parent is func? | processed? | |--------------|-------------|------------|------------------|-------------| | atomic.read | yes | no | yes | no | | atomic.write | yes | no | yes | no | | atomic.update | yes | yes | yes | yes | | critical | no | no | yes | no | | declare_mapper | no | yes | no | no | | declare_reduction | no | yes | no | no | | flush | yes | no | yes | no | | private | no | yes | yes | no | | threadprivate | yes | no | yes | no | | yield | yes | no | yes | no |
Fixes a bug introduced by #130078.
For non-BlockArgOpenMPOpInterface ops, we also want to map their entry block arguments to their operands, if any. For the current support in the OpenMP dialect, the table below lists all ops that have arguments (SSA operands and/or attributes) and not target-related. Of all these ops, we need to only process
omp.atomic.update
since it is the only op that has SSA operands & an attached region. Therefore, the region's entry block arguments must be mapped to the op's operands in case they are referenced inside the region.