Skip to content

Commit 654c63e

Browse files
[MLIR][OpenMP] Host lowering of standalone distribute
This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>
1 parent 26638a2 commit 654c63e

File tree

3 files changed

+183
-3
lines changed

3 files changed

+183
-3
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
161161
if (op.getDevice())
162162
result = todo("device");
163163
};
164+
auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
165+
if (op.getDistScheduleChunkSize())
166+
result = todo("dist_schedule with chunk_size");
167+
};
164168
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
165169
if (!op.getHasDeviceAddrVars().empty())
166170
result = todo("has_device_addr");
@@ -252,6 +256,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
252256

253257
LogicalResult result = success();
254258
llvm::TypeSwitch<Operation &>(op)
259+
.Case([&](omp::DistributeOp op) {
260+
if (op.isComposite() &&
261+
isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
262+
result = op.emitError() << "not yet implemented: "
263+
"composite omp.distribute + omp.wsloop";
264+
checkAllocate(op, result);
265+
checkDistSchedule(op, result);
266+
checkOrder(op, result);
267+
checkPrivate(op, result);
268+
})
255269
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
256270
.Case([&](omp::SectionsOp op) {
257271
checkAllocate(op, result);
@@ -3854,6 +3868,72 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
38543868
return success();
38553869
}
38563870

3871+
static LogicalResult
3872+
convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
3873+
LLVM::ModuleTranslation &moduleTranslation) {
3874+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3875+
auto distributeOp = cast<omp::DistributeOp>(opInst);
3876+
if (failed(checkImplementationStatus(opInst)))
3877+
return failure();
3878+
3879+
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3880+
auto bodyGenCB = [&](InsertPointTy allocaIP,
3881+
InsertPointTy codeGenIP) -> llvm::Error {
3882+
// Save the alloca insertion point on ModuleTranslation stack for use in
3883+
// nested regions.
3884+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
3885+
moduleTranslation, allocaIP);
3886+
3887+
// DistributeOp has only one region associated with it.
3888+
builder.restoreIP(codeGenIP);
3889+
3890+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3891+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3892+
llvm::Expected<llvm::BasicBlock *> regionBlock =
3893+
convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
3894+
builder, moduleTranslation);
3895+
if (!regionBlock)
3896+
return regionBlock.takeError();
3897+
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3898+
3899+
// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
3900+
// Static schedule is the default.
3901+
auto schedule = omp::ClauseScheduleKind::Static;
3902+
bool isOrdered = false;
3903+
std::optional<omp::ScheduleModifier> scheduleMod;
3904+
bool isSimd = false;
3905+
llvm::omp::WorksharingLoopType workshareLoopType =
3906+
llvm::omp::WorksharingLoopType::DistributeStaticLoop;
3907+
bool loopNeedsBarrier = false;
3908+
llvm::Value *chunk = nullptr;
3909+
3910+
llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
3911+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
3912+
ompBuilder->applyWorkshareLoop(
3913+
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
3914+
convertToScheduleKind(schedule), chunk, isSimd,
3915+
scheduleMod == omp::ScheduleModifier::monotonic,
3916+
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
3917+
workshareLoopType);
3918+
3919+
if (!wsloopIP)
3920+
return wsloopIP.takeError();
3921+
return llvm::Error::success();
3922+
};
3923+
3924+
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3925+
findAllocaInsertPoint(builder, moduleTranslation);
3926+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3927+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3928+
ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
3929+
3930+
if (failed(handleError(afterIP, opInst)))
3931+
return failure();
3932+
3933+
builder.restoreIP(*afterIP);
3934+
return success();
3935+
}
3936+
38573937
/// Lowers the FlagsAttr which is applied to the module on the device
38583938
/// pass when offloading, this attribute contains OpenMP RTL globals that can
38593939
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4813,6 +4893,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
48134893
.Case([&](omp::TargetOp) {
48144894
return convertOmpTarget(*op, builder, moduleTranslation);
48154895
})
4896+
.Case([&](omp::DistributeOp) {
4897+
return convertOmpDistribute(*op, builder, moduleTranslation);
4898+
})
48164899
.Case([&](omp::LoopNestOp) {
48174900
return convertOmpLoopNest(*op, builder, moduleTranslation);
48184901
})

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
32703270
// -----
32713271

32723272
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
3273+
3274+
// -----
3275+
3276+
llvm.func @distribute() {
3277+
%0 = llvm.mlir.constant(42 : index) : i64
3278+
%1 = llvm.mlir.constant(10 : index) : i64
3279+
%2 = llvm.mlir.constant(1 : index) : i64
3280+
omp.distribute {
3281+
omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
3282+
omp.yield
3283+
}
3284+
}
3285+
llvm.return
3286+
}
3287+
3288+
// CHECK-LABEL: define void @distribute
3289+
// CHECK: call void @[[OUTLINED:.*]]({{.*}})
3290+
// CHECK-NEXT: br label %[[EXIT:.*]]
3291+
// CHECK: [[EXIT]]:
3292+
// CHECK: ret void
3293+
3294+
// CHECK: define internal void @[[OUTLINED]]({{.*}})
3295+
// CHECK: %[[LASTITER:.*]] = alloca i32
3296+
// CHECK: %[[LB:.*]] = alloca i64
3297+
// CHECK: %[[UB:.*]] = alloca i64
3298+
// CHECK: %[[STRIDE:.*]] = alloca i64
3299+
// CHECK: br label %[[BODY:.*]]
3300+
// CHECK: [[BODY]]:
3301+
// CHECK-NEXT: br label %[[REGION:.*]]
3302+
// CHECK: [[REGION]]:
3303+
// CHECK-NEXT: br label %[[PREHEADER:.*]]
3304+
// CHECK: [[PREHEADER]]:
3305+
// CHECK: store i64 0, ptr %[[LB]]
3306+
// CHECK: store i64 31, ptr %[[UB]]
3307+
// CHECK: store i64 1, ptr %[[STRIDE]]
3308+
// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
3309+
// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
6666

6767
// -----
6868

69-
llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
70-
// expected-error@below {{not yet implemented: omp.distribute}}
69+
llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
70+
// expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
71+
omp.parallel {
72+
// expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
73+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
74+
omp.distribute {
75+
omp.wsloop {
76+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
77+
omp.yield
78+
}
79+
} {omp.composite}
80+
} {omp.composite}
81+
omp.terminator
82+
} {omp.composite}
83+
llvm.return
84+
}
85+
86+
// -----
87+
88+
llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
89+
// expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
90+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
91+
omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
92+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
93+
omp.yield
94+
}
95+
}
96+
llvm.return
97+
}
98+
99+
// -----
100+
101+
llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
102+
// expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
103+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
104+
omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
105+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
106+
omp.yield
107+
}
108+
}
109+
llvm.return
110+
}
111+
112+
// -----
113+
114+
llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
115+
// expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
116+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
117+
omp.distribute order(concurrent) {
118+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
119+
omp.yield
120+
}
121+
}
122+
llvm.return
123+
}
124+
125+
// -----
126+
127+
omp.private {type = private} @x.privatizer : !llvm.ptr
128+
129+
llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
130+
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
71131
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
72-
omp.distribute {
132+
omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
73133
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
74134
omp.yield
75135
}

0 commit comments

Comments
 (0)