-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[OpenMP][MLIR] Support LLVM translation for distribute
with delayed privatization
#131564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Kareem Ergawy (ergawy) ChangesAdds support for tranlating delayed privatization ( Full diff: https://github.com/llvm/llvm-project/pull/131564.diff 3 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0010ca6630050..9198ee997be07 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -250,7 +250,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkDistSchedule(op, result);
checkOrder(op, result);
- checkPrivate(op, result);
})
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
.Case([&](omp::SectionsOp op) {
@@ -4189,6 +4188,38 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
// DistributeOp has only one region associated with it.
builder.restoreIP(codeGenIP);
+ // TODO This is a recurring pattern in almost all ops that need
+ // privatization. Try to abstract it in a shared util/interface.
+ MutableArrayRef<BlockArgument> privateBlockArgs =
+ cast<omp::BlockArgOpenMPOpInterface>(*distributeOp)
+ .getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
+ SmallVector<llvm::Value *> llvmPrivateVars;
+ SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
+ llvmPrivateVars.reserve(privateBlockArgs.size());
+ collectPrivatizationDecls(distributeOp, privateDecls);
+
+ for (mlir::Value privateVar : distributeOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
+
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return llvm::make_error<PreviouslyReportedError>();
+
+ if (handleError(initPrivateVars(builder, moduleTranslation,
+ privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars),
+ opInst)
+ .failed())
+ return llvm::make_error<PreviouslyReportedError>();
+
+ if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls)))
+ return llvm::make_error<PreviouslyReportedError>();
+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::Expected<llvm::BasicBlock *> regionBlock =
diff --git a/mlir/test/Target/LLVMIR/openmp-distribute-private.mlir b/mlir/test/Target/LLVMIR/openmp-distribute-private.mlir
new file mode 100644
index 0000000000000..de4accc38a419
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-distribute-private.mlir
@@ -0,0 +1,94 @@
+// Test code-gen for `omp.distribute` ops with delayed privatizers (i.e. using
+// `omp.private` ops).
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+omp.private {type = private} @_QFEi_private_i32 : i32
+omp.private {type = private} @_QFEpriv_val_dist_private_f32 : f32
+
+llvm.func @_QQmain() {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "priv_val_dist"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(3.140000e+00 : f32) : f32
+ %5 = llvm.mlir.constant(1000 : i32) : i32
+ %6 = llvm.mlir.constant(1 : i32) : i32
+
+ omp.teams {
+ omp.distribute private(@_QFEpriv_val_dist_private_f32 %1 -> %arg0, @_QFEi_private_i32 %3 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+ omp.loop_nest (%arg2) : i32 = (%6) to (%5) inclusive step (%6) {
+ llvm.store %arg2, %arg1 : i32, !llvm.ptr
+ llvm.store %4, %arg0 : f32, !llvm.ptr
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+
+ llvm.return
+}
+
+// CHECK-LABEL: define void @_QQmain() {
+// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.*}}, i32 0, ptr @[[TEAMS_FUNC:.*]])
+// CHECK-NEXT: br label %teams.exit
+// CHECK: }
+
+// CHECK: define internal void @[[TEAMS_FUNC]]({{.*}}) {
+// CHECK: call void @[[DIST_FUNC:.*]]()
+// CHECK-NEXT: br label %distribute.exit
+// CHECK: }
+
+// CHECK: define internal void @[[DIST_FUNC]]() {
+// CHECK: %[[PRIV_VAR_ALLOC:.*]] = alloca float, align 4
+// CHECK: %[[IV_ALLOC:.*]] = alloca i32, align 4
+
+// CHECK: omp.loop_nest.region:
+// CHECK-NEXT: store i32 %{{.*}}, ptr %[[IV_ALLOC]], align 4
+// CHECK-NEXT: store float 0x40091EB860000000, ptr %[[PRIV_VAR_ALLOC]], align 4
+// CHECK: }
+
+// -----
+
+omp.private {type = firstprivate} @_QFEpriv_val_dist_firstprivate_f32 : f32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %0, %arg1 : f32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @_QQmain() {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "priv_val_dist"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(3.140000e+00 : f32) : f32
+ %6 = llvm.mlir.constant(1 : i32) : i32
+ omp.distribute private(@_QFEpriv_val_dist_firstprivate_f32 %1 -> %arg0 : !llvm.ptr) {
+ omp.loop_nest (%arg2) : i32 = (%6) to (%6) inclusive step (%6) {
+ llvm.store %4, %arg0 : f32, !llvm.ptr
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @_QQmain() {
+// CHECK: %[[SHARED_VAR_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK: %[[SHARED_VAR_PTR:.*]] = getelementptr { ptr }, ptr %[[DIST_PARAM:.*]], i32 0, i32 0
+// CHECK: store ptr %[[SHARED_VAR_ALLOC]], ptr %[[SHARED_VAR_PTR]], align 8
+// CHECK: call void @[[DIST_FUNC:.*]](ptr %[[DIST_PARAM]])
+// CHECK-NEXT: br label %distribute.exit
+// CHECK: }
+
+// CHECK: define internal void @[[DIST_FUNC]](ptr %[[DIST_ARG:.*]]) {
+// CHECK: %[[SHARED_VAR_GEP:.*]] = getelementptr { ptr }, ptr %[[DIST_ARG]], i32 0, i32 0
+// CHECK: %[[SHARED_VAR_PTR2:.*]] = load ptr, ptr %[[SHARED_VAR_GEP]], align 8
+// CHECK: %[[PRIV_VAR_ALLOC:.*]] = alloca float, align 4
+
+// CHECK: omp.private.copy:
+// CHECK-NEXT: %[[SHARED_VAR_VAL:.*]] = load float, ptr %[[SHARED_VAR_PTR2]], align 4
+// CHECK-NEXT: store float %[[SHARED_VAR_VAL]], ptr %[[PRIV_VAR_ALLOC]], align 4
+
+// CHECK: omp.loop_nest.region:
+// CHECK-NEXT: store float 0x40091EB860000000, ptr %[[PRIV_VAR_ALLOC]], align 4
+// CHECK: }
+
+
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 7a4d304f2d2d5..af31f8bab73ac 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -105,21 +105,6 @@ llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
// -----
-omp.private {type = private} @x.privatizer : !llvm.ptr
-
-llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
- // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
- // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
- omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
- omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
- omp.yield
- }
- }
- llvm.return
-}
-
-// -----
-
llvm.func @ordered_region_par_level_simd() {
// expected-error@below {{not yet implemented: Unhandled clause parallelization-level in omp.ordered.region operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.ordered.region}}
|
2b22a17
to
f75f164
Compare
… privatization Adds support for tranlating delayed privatization (`private` and `firstprivate`) for `omp.distribute` ops.
f75f164
to
17f14be
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Adds support for tranlating delayed privatization (
private
andfirstprivate
) foromp.distribute
ops.