Skip to content

Commit 49b8d84

Browse files
authored
[OpenMP][MLIR] Support LLVM translation for distribute with delayed privatization (#131564)
Adds support for tranlating delayed privatization (`private` and `firstprivate`) for `omp.distribute` ops.
1 parent 44e4b27 commit 49b8d84

File tree

3 files changed

+168
-40
lines changed

3 files changed

+168
-40
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
250250
checkAllocate(op, result);
251251
checkDistSchedule(op, result);
252252
checkOrder(op, result);
253-
checkPrivate(op, result);
254253
})
255254
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
256255
.Case([&](omp::SectionsOp op) {
@@ -4188,6 +4187,38 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
41884187
// DistributeOp has only one region associated with it.
41894188
builder.restoreIP(codeGenIP);
41904189

4190+
// TODO This is a recurring pattern in almost all ops that need
4191+
// privatization. Try to abstract it in a shared util/interface.
4192+
MutableArrayRef<BlockArgument> privateBlockArgs =
4193+
cast<omp::BlockArgOpenMPOpInterface>(*distributeOp)
4194+
.getPrivateBlockArgs();
4195+
SmallVector<mlir::Value> mlirPrivateVars;
4196+
SmallVector<llvm::Value *> llvmPrivateVars;
4197+
SmallVector<omp::PrivateClauseOp> privateDecls;
4198+
mlirPrivateVars.reserve(privateBlockArgs.size());
4199+
llvmPrivateVars.reserve(privateBlockArgs.size());
4200+
collectPrivatizationDecls(distributeOp, privateDecls);
4201+
4202+
for (mlir::Value privateVar : distributeOp.getPrivateVars())
4203+
mlirPrivateVars.push_back(privateVar);
4204+
4205+
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
4206+
builder, moduleTranslation, privateBlockArgs, privateDecls,
4207+
mlirPrivateVars, llvmPrivateVars, allocaIP);
4208+
if (handleError(afterAllocas, opInst).failed())
4209+
return llvm::make_error<PreviouslyReportedError>();
4210+
4211+
if (handleError(initPrivateVars(builder, moduleTranslation,
4212+
privateBlockArgs, privateDecls,
4213+
mlirPrivateVars, llvmPrivateVars),
4214+
opInst)
4215+
.failed())
4216+
return llvm::make_error<PreviouslyReportedError>();
4217+
4218+
if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
4219+
llvmPrivateVars, privateDecls)))
4220+
return llvm::make_error<PreviouslyReportedError>();
4221+
41914222
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
41924223
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
41934224
llvm::Expected<llvm::BasicBlock *> regionBlock =
@@ -4200,31 +4231,37 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
42004231
// Skip applying a workshare loop below when translating 'distribute
42014232
// parallel do' (it's been already handled by this point while translating
42024233
// the nested omp.wsloop).
4203-
if (isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper()))
4204-
return llvm::Error::success();
4234+
if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4235+
// TODO: Add support for clauses which are valid for DISTRIBUTE
4236+
// constructs. Static schedule is the default.
4237+
auto schedule = omp::ClauseScheduleKind::Static;
4238+
bool isOrdered = false;
4239+
std::optional<omp::ScheduleModifier> scheduleMod;
4240+
bool isSimd = false;
4241+
llvm::omp::WorksharingLoopType workshareLoopType =
4242+
llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4243+
bool loopNeedsBarrier = false;
4244+
llvm::Value *chunk = nullptr;
4245+
4246+
llvm::CanonicalLoopInfo *loopInfo =
4247+
findCurrentLoopInfo(moduleTranslation);
4248+
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4249+
ompBuilder->applyWorkshareLoop(
4250+
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4251+
convertToScheduleKind(schedule), chunk, isSimd,
4252+
scheduleMod == omp::ScheduleModifier::monotonic,
4253+
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4254+
workshareLoopType);
4255+
4256+
if (!wsloopIP)
4257+
return wsloopIP.takeError();
4258+
}
4259+
4260+
if (failed(cleanupPrivateVars(builder, moduleTranslation,
4261+
distributeOp.getLoc(), llvmPrivateVars,
4262+
privateDecls)))
4263+
return llvm::make_error<PreviouslyReportedError>();
42054264

4206-
// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
4207-
// Static schedule is the default.
4208-
auto schedule = omp::ClauseScheduleKind::Static;
4209-
bool isOrdered = false;
4210-
std::optional<omp::ScheduleModifier> scheduleMod;
4211-
bool isSimd = false;
4212-
llvm::omp::WorksharingLoopType workshareLoopType =
4213-
llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4214-
bool loopNeedsBarrier = false;
4215-
llvm::Value *chunk = nullptr;
4216-
4217-
llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
4218-
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4219-
ompBuilder->applyWorkshareLoop(
4220-
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4221-
convertToScheduleKind(schedule), chunk, isSimd,
4222-
scheduleMod == omp::ScheduleModifier::monotonic,
4223-
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4224-
workshareLoopType);
4225-
4226-
if (!wsloopIP)
4227-
return wsloopIP.takeError();
42284265
return llvm::Error::success();
42294266
};
42304267

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Test code-gen for `omp.distribute` ops with delayed privatizers (i.e. using
2+
// `omp.private` ops).
3+
4+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
5+
6+
omp.private {type = private} @_QFEi_private_i32 : i32
7+
omp.private {type = private} @_QFEpriv_val_dist_private_f32 : f32
8+
9+
llvm.func @_QQmain() {
10+
%0 = llvm.mlir.constant(1 : i64) : i64
11+
%1 = llvm.alloca %0 x f32 {bindc_name = "priv_val_dist"} : (i64) -> !llvm.ptr
12+
%3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
13+
%4 = llvm.mlir.constant(3.140000e+00 : f32) : f32
14+
%5 = llvm.mlir.constant(1000 : i32) : i32
15+
%6 = llvm.mlir.constant(1 : i32) : i32
16+
17+
omp.teams {
18+
omp.distribute private(@_QFEpriv_val_dist_private_f32 %1 -> %arg0, @_QFEi_private_i32 %3 -> %arg1 : !llvm.ptr, !llvm.ptr) {
19+
omp.loop_nest (%arg2) : i32 = (%6) to (%5) inclusive step (%6) {
20+
llvm.store %arg2, %arg1 : i32, !llvm.ptr
21+
llvm.store %4, %arg0 : f32, !llvm.ptr
22+
omp.yield
23+
}
24+
}
25+
omp.terminator
26+
}
27+
28+
llvm.return
29+
}
30+
31+
// CHECK-LABEL: define void @_QQmain() {
32+
// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.*}}, i32 0, ptr @[[TEAMS_FUNC:.*]])
33+
// CHECK-NEXT: br label %teams.exit
34+
// CHECK: }
35+
36+
// CHECK: define internal void @[[TEAMS_FUNC]]({{.*}}) {
37+
// CHECK: call void @[[DIST_FUNC:.*]]()
38+
// CHECK-NEXT: br label %distribute.exit
39+
// CHECK: }
40+
41+
// CHECK: define internal void @[[DIST_FUNC]]() {
42+
// CHECK: %[[PRIV_VAR_ALLOC:.*]] = alloca float, align 4
43+
// CHECK: %[[IV_ALLOC:.*]] = alloca i32, align 4
44+
45+
// CHECK: omp.loop_nest.region:
46+
// CHECK-NEXT: store i32 %{{.*}}, ptr %[[IV_ALLOC]], align 4
47+
// CHECK-NEXT: store float 0x40091EB860000000, ptr %[[PRIV_VAR_ALLOC]], align 4
48+
// CHECK: }
49+
50+
// -----
51+
52+
llvm.func @foo_free(!llvm.ptr)
53+
54+
omp.private {type = firstprivate} @_QFEpriv_val_dist_firstprivate_f32 : f32 copy {
55+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
56+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
57+
llvm.store %0, %arg1 : f32, !llvm.ptr
58+
omp.yield(%arg1 : !llvm.ptr)
59+
} dealloc {
60+
^bb0(%arg0: !llvm.ptr):
61+
llvm.call @foo_free(%arg0) : (!llvm.ptr) -> ()
62+
omp.yield
63+
}
64+
65+
llvm.func @_QQmain() {
66+
%0 = llvm.mlir.constant(1 : i64) : i64
67+
%1 = llvm.alloca %0 x f32 {bindc_name = "priv_val_dist"} : (i64) -> !llvm.ptr
68+
%4 = llvm.mlir.constant(3.140000e+00 : f32) : f32
69+
%6 = llvm.mlir.constant(1 : i32) : i32
70+
omp.distribute private(@_QFEpriv_val_dist_firstprivate_f32 %1 -> %arg0 : !llvm.ptr) {
71+
omp.loop_nest (%arg2) : i32 = (%6) to (%6) inclusive step (%6) {
72+
llvm.store %4, %arg0 : f32, !llvm.ptr
73+
omp.yield
74+
}
75+
}
76+
llvm.return
77+
}
78+
79+
// CHECK-LABEL: define void @_QQmain() {
80+
// CHECK: %[[SHARED_VAR_ALLOC:.*]] = alloca float, i64 1, align 4
81+
// CHECK: %[[SHARED_VAR_PTR:.*]] = getelementptr { ptr }, ptr %[[DIST_PARAM:.*]], i32 0, i32 0
82+
// CHECK: store ptr %[[SHARED_VAR_ALLOC]], ptr %[[SHARED_VAR_PTR]], align 8
83+
// CHECK: call void @[[DIST_FUNC:.*]](ptr %[[DIST_PARAM]])
84+
// CHECK-NEXT: br label %distribute.exit
85+
// CHECK: }
86+
87+
// CHECK: define internal void @[[DIST_FUNC]](ptr %[[DIST_ARG:.*]]) {
88+
// CHECK: %[[SHARED_VAR_GEP:.*]] = getelementptr { ptr }, ptr %[[DIST_ARG]], i32 0, i32 0
89+
// CHECK: %[[SHARED_VAR_PTR2:.*]] = load ptr, ptr %[[SHARED_VAR_GEP]], align 8
90+
// CHECK: %[[PRIV_VAR_ALLOC:.*]] = alloca float, align 4
91+
92+
// CHECK: omp.private.copy:
93+
// CHECK-NEXT: %[[SHARED_VAR_VAL:.*]] = load float, ptr %[[SHARED_VAR_PTR2]], align 4
94+
// CHECK-NEXT: store float %[[SHARED_VAR_VAL]], ptr %[[PRIV_VAR_ALLOC]], align 4
95+
96+
// CHECK: omp_loop.after:
97+
// CHECK-NEXT: br label %omp.region.cont
98+
99+
// CHECK: omp.region.cont:
100+
// CHECK-NEXT: call void @foo_free(ptr %[[PRIV_VAR_ALLOC]])
101+
102+
// CHECK: omp.loop_nest.region:
103+
// CHECK-NEXT: store float 0x40091EB860000000, ptr %[[PRIV_VAR_ALLOC]], align 4
104+
// CHECK: }
105+
106+

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,6 @@ llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
105105

106106
// -----
107107

108-
omp.private {type = private} @x.privatizer : !llvm.ptr
109-
110-
llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
111-
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
112-
// expected-error@below {{LLVM Translation failed for operation: omp.distribute}}
113-
omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
114-
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
115-
omp.yield
116-
}
117-
}
118-
llvm.return
119-
}
120-
121-
// -----
122-
123108
llvm.func @ordered_region_par_level_simd() {
124109
// expected-error@below {{not yet implemented: Unhandled clause parallelization-level in omp.ordered.region operation}}
125110
// expected-error@below {{LLVM Translation failed for operation: omp.ordered.region}}

0 commit comments

Comments
 (0)