-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[MLIR][OpenMP] Host lowering of standalone distribute #127817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) ChangesThis patch adds MLIR to LLVM IR translation support for standalone Full diff: https://github.com/llvm/llvm-project/pull/127817.diff 3 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a5ff3eff6439f..c8221a9f9854a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (op.getDevice())
result = todo("device");
};
+ auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+ if (op.getDistScheduleChunkSize())
+ result = todo("dist_schedule with chunk_size");
+ };
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
if (!op.getHasDeviceAddrVars().empty())
result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
LogicalResult result = success();
llvm::TypeSwitch<Operation &>(op)
+ .Case([&](omp::DistributeOp op) {
+ if (op.isComposite() &&
+ isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
+ result = op.emitError() << "not yet implemented: "
+ "composite omp.distribute + omp.wsloop";
+ checkAllocate(op, result);
+ checkDistSchedule(op, result);
+ checkOrder(op, result);
+ checkPrivate(op, result);
+ })
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
.Case([&](omp::SectionsOp op) {
checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
return success();
}
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ auto distributeOp = cast<omp::DistributeOp>(opInst);
+ if (failed(checkImplementationStatus(opInst)))
+ return failure();
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ auto bodyGenCB = [&](InsertPointTy allocaIP,
+ InsertPointTy codeGenIP) -> llvm::Error {
+ // DistributeOp has only one region associated with it.
+ builder.restoreIP(codeGenIP);
+
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::Expected<llvm::BasicBlock *> regionBlock =
+ convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+ builder, moduleTranslation);
+ if (!regionBlock)
+ return regionBlock.takeError();
+ builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+ // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+ // Static schedule is the default.
+ auto schedule = omp::ClauseScheduleKind::Static;
+ bool isOrdered = false;
+ std::optional<omp::ScheduleModifier> scheduleMod;
+ bool isSimd = false;
+ llvm::omp::WorksharingLoopType workshareLoopType =
+ llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+ bool loopNeedsBarrier = false;
+ llvm::Value *chunk = nullptr;
+
+ llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+ ompBuilder->applyWorkshareLoop(
+ ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+ convertToScheduleKind(schedule), chunk, isSimd,
+ scheduleMod == omp::ScheduleModifier::monotonic,
+ scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+ workshareLoopType);
+
+ if (!wsloopIP)
+ return wsloopIP.takeError();
+ return llvm::Error::success();
+ };
+
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+ ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+
+ if (failed(handleError(afterIP, opInst)))
+ return failure();
+
+ builder.restoreIP(*afterIP);
+ return success();
+}
+
/// Lowers the FlagsAttr which is applied to the module on the device
/// pass when offloading, this attribute contains OpenMP RTL globals that can
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4685,6 +4760,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
.Case([&](omp::TargetOp) {
return convertOmpTarget(*op, builder, moduleTranslation);
})
+ .Case([&](omp::DistributeOp) {
+ return convertOmpDistribute(*op, builder, moduleTranslation);
+ })
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index cf18c07dd605b..a5a490e527d79 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
// -----
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
+
+// -----
+
+llvm.func @distribute() {
+ %0 = llvm.mlir.constant(42 : index) : i64
+ %1 = llvm.mlir.constant(10 : index) : i64
+ %2 = llvm.mlir.constant(1 : index) : i64
+ omp.distribute {
+ omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @distribute
+// CHECK: call void @[[OUTLINED:.*]]({{.*}})
+// CHECK-NEXT: br label %[[EXIT:.*]]
+// CHECK: [[EXIT]]:
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTLINED]]({{.*}})
+// CHECK: %[[LASTITER:.*]] = alloca i32
+// CHECK: %[[LB:.*]] = alloca i64
+// CHECK: %[[UB:.*]] = alloca i64
+// CHECK: %[[STRIDE:.*]] = alloca i64
+// CHECK: br label %[[BODY:.*]]
+// CHECK: [[BODY]]:
+// CHECK-NEXT: br label %[[REGION:.*]]
+// CHECK: [[REGION]]:
+// CHECK-NEXT: br label %[[PREHEADER:.*]]
+// CHECK: [[PREHEADER]]:
+// CHECK: store i64 0, ptr %[[LB]]
+// CHECK: store i64 31, ptr %[[UB]]
+// CHECK: store i64 1, ptr %[[STRIDE]]
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
+// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index e97b5e54e6415..71dbc061c3104 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
// -----
-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
- // expected-error@below {{not yet implemented: omp.distribute}}
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
+ omp.parallel {
+ // expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
+ // expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute order(concurrent) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+omp.private {type = private} @x.privatizer : !llvm.ptr
+
+llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
- omp.distribute {
+ omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
a79b7a2
to
40d140e
Compare
128819a
to
55089ba
Compare
40d140e
to
26638a2
Compare
55089ba
to
654c63e
Compare
00cc8b6
to
6c88935
Compare
This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>
654c63e
to
e780d29
Compare
This patch adds MLIR to LLVM IR translation support for standalone
omp.distribute
operations, as well asdistribute simd
through ignoring SIMD information (similarly todo/for simd
).