Skip to content

Commit 55089ba

Browse files
[MLIR][OpenMP] Host lowering of standalone distribute
This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>
1 parent 40d140e commit 55089ba

File tree

3 files changed

+183
-3
lines changed

3 files changed

+183
-3
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
161161
if (op.getDevice())
162162
result = todo("device");
163163
};
164+
auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
165+
if (op.getDistScheduleChunkSize())
166+
result = todo("dist_schedule with chunk_size");
167+
};
164168
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
165169
if (!op.getHasDeviceAddrVars().empty())
166170
result = todo("has_device_addr");
@@ -252,6 +256,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
252256

253257
LogicalResult result = success();
254258
llvm::TypeSwitch<Operation &>(op)
259+
.Case([&](omp::DistributeOp op) {
260+
if (op.isComposite() &&
261+
isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
262+
result = op.emitError() << "not yet implemented: "
263+
"composite omp.distribute + omp.wsloop";
264+
checkAllocate(op, result);
265+
checkDistSchedule(op, result);
266+
checkOrder(op, result);
267+
checkPrivate(op, result);
268+
})
255269
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
256270
.Case([&](omp::SectionsOp op) {
257271
checkAllocate(op, result);
@@ -3754,6 +3768,72 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
37543768
return success();
37553769
}
37563770

3771+
static LogicalResult
3772+
convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
3773+
LLVM::ModuleTranslation &moduleTranslation) {
3774+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3775+
auto distributeOp = cast<omp::DistributeOp>(opInst);
3776+
if (failed(checkImplementationStatus(opInst)))
3777+
return failure();
3778+
3779+
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3780+
auto bodyGenCB = [&](InsertPointTy allocaIP,
3781+
InsertPointTy codeGenIP) -> llvm::Error {
3782+
// Save the alloca insertion point on ModuleTranslation stack for use in
3783+
// nested regions.
3784+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
3785+
moduleTranslation, allocaIP);
3786+
3787+
// DistributeOp has only one region associated with it.
3788+
builder.restoreIP(codeGenIP);
3789+
3790+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3791+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3792+
llvm::Expected<llvm::BasicBlock *> regionBlock =
3793+
convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
3794+
builder, moduleTranslation);
3795+
if (!regionBlock)
3796+
return regionBlock.takeError();
3797+
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3798+
3799+
// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
3800+
// Static schedule is the default.
3801+
auto schedule = omp::ClauseScheduleKind::Static;
3802+
bool isOrdered = false;
3803+
std::optional<omp::ScheduleModifier> scheduleMod;
3804+
bool isSimd = false;
3805+
llvm::omp::WorksharingLoopType workshareLoopType =
3806+
llvm::omp::WorksharingLoopType::DistributeStaticLoop;
3807+
bool loopNeedsBarrier = false;
3808+
llvm::Value *chunk = nullptr;
3809+
3810+
llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
3811+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
3812+
ompBuilder->applyWorkshareLoop(
3813+
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
3814+
convertToScheduleKind(schedule), chunk, isSimd,
3815+
scheduleMod == omp::ScheduleModifier::monotonic,
3816+
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
3817+
workshareLoopType);
3818+
3819+
if (!wsloopIP)
3820+
return wsloopIP.takeError();
3821+
return llvm::Error::success();
3822+
};
3823+
3824+
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3825+
findAllocaInsertPoint(builder, moduleTranslation);
3826+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3827+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3828+
ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
3829+
3830+
if (failed(handleError(afterIP, opInst)))
3831+
return failure();
3832+
3833+
builder.restoreIP(*afterIP);
3834+
return success();
3835+
}
3836+
37573837
/// Lowers the FlagsAttr which is applied to the module on the device
37583838
/// pass when offloading, this attribute contains OpenMP RTL globals that can
37593839
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4697,6 +4777,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
46974777
.Case([&](omp::TargetOp) {
46984778
return convertOmpTarget(*op, builder, moduleTranslation);
46994779
})
4780+
.Case([&](omp::DistributeOp) {
4781+
return convertOmpDistribute(*op, builder, moduleTranslation);
4782+
})
47004783
.Case([&](omp::LoopNestOp) {
47014784
return convertOmpLoopNest(*op, builder, moduleTranslation);
47024785
})

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
32703270
// -----
32713271

32723272
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
3273+
3274+
// -----
3275+
3276+
llvm.func @distribute() {
3277+
%0 = llvm.mlir.constant(42 : index) : i64
3278+
%1 = llvm.mlir.constant(10 : index) : i64
3279+
%2 = llvm.mlir.constant(1 : index) : i64
3280+
omp.distribute {
3281+
omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
3282+
omp.yield
3283+
}
3284+
}
3285+
llvm.return
3286+
}
3287+
3288+
// CHECK-LABEL: define void @distribute
3289+
// CHECK: call void @[[OUTLINED:.*]]({{.*}})
3290+
// CHECK-NEXT: br label %[[EXIT:.*]]
3291+
// CHECK: [[EXIT]]:
3292+
// CHECK: ret void
3293+
3294+
// CHECK: define internal void @[[OUTLINED]]({{.*}})
3295+
// CHECK: %[[LASTITER:.*]] = alloca i32
3296+
// CHECK: %[[LB:.*]] = alloca i64
3297+
// CHECK: %[[UB:.*]] = alloca i64
3298+
// CHECK: %[[STRIDE:.*]] = alloca i64
3299+
// CHECK: br label %[[BODY:.*]]
3300+
// CHECK: [[BODY]]:
3301+
// CHECK-NEXT: br label %[[REGION:.*]]
3302+
// CHECK: [[REGION]]:
3303+
// CHECK-NEXT: br label %[[PREHEADER:.*]]
3304+
// CHECK: [[PREHEADER]]:
3305+
// CHECK: store i64 0, ptr %[[LB]]
3306+
// CHECK: store i64 31, ptr %[[UB]]
3307+
// CHECK: store i64 1, ptr %[[STRIDE]]
3308+
// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
3309+
// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
6666

6767
// -----
6868

69-
llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
70-
// expected-error@below {{not yet implemented: omp.distribute}}
69+
llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
70+
// expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
71+
omp.parallel {
72+
// expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
73+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
74+
omp.distribute {
75+
omp.wsloop {
76+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
77+
omp.yield
78+
}
79+
} {omp.composite}
80+
} {omp.composite}
81+
omp.terminator
82+
} {omp.composite}
83+
llvm.return
84+
}
85+
86+
// -----
87+
88+
llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
89+
// expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
90+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
91+
omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
92+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
93+
omp.yield
94+
}
95+
}
96+
llvm.return
97+
}
98+
99+
// -----
100+
101+
llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
102+
// expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
103+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
104+
omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
105+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
106+
omp.yield
107+
}
108+
}
109+
llvm.return
110+
}
111+
112+
// -----
113+
114+
llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
115+
// expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
116+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
117+
omp.distribute order(concurrent) {
118+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
119+
omp.yield
120+
}
121+
}
122+
llvm.return
123+
}
124+
125+
// -----
126+
127+
omp.private {type = private} @x.privatizer : !llvm.ptr
128+
129+
llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
130+
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
71131
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
72-
omp.distribute {
132+
omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
73133
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
74134
omp.yield
75135
}

0 commit comments

Comments
 (0)