Skip to content

Commit 8ecbf35

Browse files
[MLIR][OpenMP] Host lowering of standalone distribute
This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski <[email protected]>
1 parent a79b7a2 commit 8ecbf35

File tree

3 files changed

+178
-3
lines changed

3 files changed

+178
-3
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
164164
if (op.getDevice())
165165
result = todo("device");
166166
};
167+
auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
168+
if (op.getDistScheduleChunkSize())
169+
result = todo("dist_schedule with chunk_size");
170+
};
167171
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
168172
if (!op.getHasDeviceAddrVars().empty())
169173
result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
255259

256260
LogicalResult result = success();
257261
llvm::TypeSwitch<Operation &>(op)
262+
.Case([&](omp::DistributeOp op) {
263+
if (op.isComposite() &&
264+
isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
265+
result = op.emitError() << "not yet implemented: "
266+
"composite omp.distribute + omp.wsloop";
267+
checkAllocate(op, result);
268+
checkDistSchedule(op, result);
269+
checkOrder(op, result);
270+
checkPrivate(op, result);
271+
})
258272
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
259273
.Case([&](omp::SectionsOp op) {
260274
checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
37553769
return success();
37563770
}
37573771

3772+
static LogicalResult
3773+
convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
3774+
LLVM::ModuleTranslation &moduleTranslation) {
3775+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3776+
auto distributeOp = cast<omp::DistributeOp>(opInst);
3777+
if (failed(checkImplementationStatus(opInst)))
3778+
return failure();
3779+
3780+
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3781+
auto bodyGenCB = [&](InsertPointTy allocaIP,
3782+
InsertPointTy codeGenIP) -> llvm::Error {
3783+
// DistributeOp has only one region associated with it.
3784+
builder.restoreIP(codeGenIP);
3785+
3786+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3787+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3788+
llvm::Expected<llvm::BasicBlock *> regionBlock =
3789+
convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
3790+
builder, moduleTranslation);
3791+
if (!regionBlock)
3792+
return regionBlock.takeError();
3793+
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3794+
3795+
// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
3796+
// Static schedule is the default.
3797+
auto schedule = omp::ClauseScheduleKind::Static;
3798+
bool isOrdered = false;
3799+
std::optional<omp::ScheduleModifier> scheduleMod;
3800+
bool isSimd = false;
3801+
llvm::omp::WorksharingLoopType workshareLoopType =
3802+
llvm::omp::WorksharingLoopType::DistributeStaticLoop;
3803+
bool loopNeedsBarrier = false;
3804+
llvm::Value *chunk = nullptr;
3805+
3806+
llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation);
3807+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
3808+
ompBuilder->applyWorkshareLoop(
3809+
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
3810+
convertToScheduleKind(schedule), chunk, isSimd,
3811+
scheduleMod == omp::ScheduleModifier::monotonic,
3812+
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
3813+
workshareLoopType);
3814+
3815+
if (!wsloopIP)
3816+
return wsloopIP.takeError();
3817+
return llvm::Error::success();
3818+
};
3819+
3820+
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3821+
findAllocaInsertPoint(builder, moduleTranslation);
3822+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3823+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3824+
ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
3825+
3826+
if (failed(handleError(afterIP, opInst)))
3827+
return failure();
3828+
3829+
builder.restoreIP(*afterIP);
3830+
return success();
3831+
}
3832+
37583833
/// Lowers the FlagsAttr which is applied to the module on the device
37593834
/// pass when offloading, this attribute contains OpenMP RTL globals that can
37603835
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4685,6 +4760,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
46854760
.Case([&](omp::TargetOp) {
46864761
return convertOmpTarget(*op, builder, moduleTranslation);
46874762
})
4763+
.Case([&](omp::DistributeOp) {
4764+
return convertOmpDistribute(*op, builder, moduleTranslation);
4765+
})
46884766
.Case([&](omp::LoopNestOp) {
46894767
return convertOmpLoopNest(*op, builder, moduleTranslation);
46904768
})

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
32703270
// -----
32713271

32723272
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
3273+
3274+
// -----
3275+
3276+
llvm.func @distribute() {
3277+
%0 = llvm.mlir.constant(42 : index) : i64
3278+
%1 = llvm.mlir.constant(10 : index) : i64
3279+
%2 = llvm.mlir.constant(1 : index) : i64
3280+
omp.distribute {
3281+
omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
3282+
omp.yield
3283+
}
3284+
}
3285+
llvm.return
3286+
}
3287+
3288+
// CHECK-LABEL: define void @distribute
3289+
// CHECK: call void @[[OUTLINED:.*]]({{.*}})
3290+
// CHECK-NEXT: br label %[[EXIT:.*]]
3291+
// CHECK: [[EXIT]]:
3292+
// CHECK: ret void
3293+
3294+
// CHECK: define internal void @[[OUTLINED]]({{.*}})
3295+
// CHECK: %[[LASTITER:.*]] = alloca i32
3296+
// CHECK: %[[LB:.*]] = alloca i64
3297+
// CHECK: %[[UB:.*]] = alloca i64
3298+
// CHECK: %[[STRIDE:.*]] = alloca i64
3299+
// CHECK: br label %[[BODY:.*]]
3300+
// CHECK: [[BODY]]:
3301+
// CHECK-NEXT: br label %[[REGION:.*]]
3302+
// CHECK: [[REGION]]:
3303+
// CHECK-NEXT: br label %[[PREHEADER:.*]]
3304+
// CHECK: [[PREHEADER]]:
3305+
// CHECK: store i64 0, ptr %[[LB]]
3306+
// CHECK: store i64 31, ptr %[[UB]]
3307+
// CHECK: store i64 1, ptr %[[STRIDE]]
3308+
// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
3309+
// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
6666

6767
// -----
6868

69-
llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
70-
// expected-error@below {{not yet implemented: omp.distribute}}
69+
llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
70+
// expected-error@below {{LLVM Translation failed for operation: omp.parallel}}
71+
omp.parallel {
72+
// expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}}
73+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
74+
omp.distribute {
75+
omp.wsloop {
76+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
77+
omp.yield
78+
}
79+
} {omp.composite}
80+
} {omp.composite}
81+
omp.terminator
82+
} {omp.composite}
83+
llvm.return
84+
}
85+
86+
// -----
87+
88+
llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
89+
// expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
90+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
91+
omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
92+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
93+
omp.yield
94+
}
95+
}
96+
llvm.return
97+
}
98+
99+
// -----
100+
101+
llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
102+
// expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
103+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
104+
omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
105+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
106+
omp.yield
107+
}
108+
}
109+
llvm.return
110+
}
111+
112+
// -----
113+
114+
llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
115+
// expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
116+
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
117+
omp.distribute order(concurrent) {
118+
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
119+
omp.yield
120+
}
121+
}
122+
llvm.return
123+
}
124+
125+
// -----
126+
127+
omp.private {type = private} @x.privatizer : !llvm.ptr
128+
129+
llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
130+
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
71131
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
72-
omp.distribute {
132+
omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
73133
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
74134
omp.yield
75135
}

0 commit comments

Comments
 (0)