Skip to content

Commit 99cbe2d

Browse files
committed
[MLIR][OpenMP] Support basic materialization for omp.private ops
Adds basic support for materializing delayed privatization. So far, the restrictions on the implementation are: - Only `private` clauses are supported (`firstprivate` support will be added in a later PR). - Only single-block `omp.private -> alloc` regions are supported (multi-block ones will be supported in a later PR).
1 parent 0bb1415 commit 99cbe2d

File tree

2 files changed

+199
-6
lines changed

2 files changed

+199
-6
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,26 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
10001000
return success();
10011001
}
10021002

1003+
/// Replace the region arguments of the parallel op (which correspond to private
1004+
/// variables) with the actual private varibles they correspond to. This
1005+
/// prepares the parallel op so that it matches what is expected by the
1006+
/// OMPIRBuilder.
1007+
static void prepareOmpParallelForPrivatization(omp::ParallelOp opInst) {
1008+
Region &region = opInst.getRegion();
1009+
auto privateVars = opInst.getPrivateVars();
1010+
1011+
auto privateVarsIt = privateVars.begin();
1012+
// Reduction precede private arguments, so skip them first.
1013+
unsigned privateArgBeginIdx = opInst.getNumReductionVars();
1014+
unsigned privateArgEndIdx = privateArgBeginIdx + privateVars.size();
1015+
for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1016+
++argIdx, ++privateVarsIt)
1017+
replaceAllUsesInRegionWith(region.getArgument(argIdx), *privateVarsIt,
1018+
region);
1019+
1020+
region.front().eraseArguments(privateArgBeginIdx, privateVars.size());
1021+
}
1022+
10031023
/// Converts the OpenMP parallel operation to LLVM IR.
10041024
static LogicalResult
10051025
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1008,6 +1028,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
10081028
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
10091029
// relying on captured variables.
10101030
LogicalResult bodyGenStatus = success();
1031+
prepareOmpParallelForPrivatization(opInst);
10111032
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
10121033

10131034
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
@@ -1092,6 +1113,86 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
10921113
llvm::Value *&replacementValue) -> InsertPointTy {
10931114
replacementValue = &vPtr;
10941115

1116+
// If this is a private value, this lambda will return the corresponding
1117+
// mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1118+
// returned.
1119+
auto [privVar, privatizerClone] =
1120+
[&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1121+
if (!opInst.getPrivateVars().empty()) {
1122+
auto privVars = opInst.getPrivateVars();
1123+
auto privatizers = opInst.getPrivatizers();
1124+
assert(privatizers && privatizers->size() == privVars.size());
1125+
1126+
const auto *privInitIt = privatizers->begin();
1127+
for (auto privVarIt = privVars.begin(); privVarIt != privVars.end();
1128+
++privVarIt, ++privInitIt) {
1129+
// Find the MLIR private variable corresponding to the LLVM value
1130+
// being privatized.
1131+
llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(*privVarIt);
1132+
if (llvmPrivVar != &vPtr)
1133+
continue;
1134+
1135+
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(*privInitIt);
1136+
omp::PrivateClauseOp privatizer =
1137+
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1138+
opInst, privSym);
1139+
1140+
assert(privatizer);
1141+
// Clone the privatizer in case it used by more than one parallel
1142+
// region. The privatizer is processed in-place (see below) before it
1143+
// gets inlined in the parallel region and therefore processing the
1144+
// original op is dangerous.
1145+
return {*privVarIt, privatizer.clone()};
1146+
}
1147+
}
1148+
1149+
return {mlir::Value(), omp::PrivateClauseOp()};
1150+
}();
1151+
1152+
if (privVar) {
1153+
assert(privatizerClone.getDataSharingType() !=
1154+
omp::DataSharingClauseType::FirstPrivate &&
1155+
"TODO: delayed privatization is not supported for `firstprivate` "
1156+
"clauses yet.");
1157+
Region &allocRegion = privatizerClone.getAllocRegion();
1158+
assert(allocRegion.getNumArguments() == 1);
1159+
assert(allocRegion.hasOneBlock() &&
1160+
"TODO: multi-block alloc regions are not supported yet. Seems "
1161+
"like there is a difference in `inlineConvertOmpRegions`'s "
1162+
"pre-conditions for single- and multi-block regions.");
1163+
1164+
// Replace the privatizer block argument with mlir value being privatized.
1165+
// This way, the body of the privatizer will be changed from using the
1166+
// region/block argument to the value being privatized.
1167+
auto allocRegionArg = allocRegion.getArgument(0);
1168+
replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1169+
1170+
auto oldIP = builder.saveIP();
1171+
builder.restoreIP(allocaIP);
1172+
1173+
// Temporarily unlink the terminator from its parent since
1174+
// `inlineConvertOmpRegions` expects the insertion block to **not**
1175+
// contain a terminator.
1176+
llvm::Instruction &allocaTerminator = builder.GetInsertBlock()->back();
1177+
assert(allocaTerminator.isTerminator());
1178+
allocaTerminator.removeFromParent();
1179+
1180+
SmallVector<llvm::Value *, 1> yieldedValues;
1181+
if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1182+
moduleTranslation, &yieldedValues))) {
1183+
opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1184+
"op in the parallel region");
1185+
bodyGenStatus = failure();
1186+
} else {
1187+
assert(yieldedValues.size() == 1);
1188+
replacementValue = yieldedValues.front();
1189+
}
1190+
1191+
allocaTerminator.insertAfter(&builder.GetInsertBlock()->back());
1192+
privatizerClone.erase();
1193+
builder.restoreIP(oldIP);
1194+
}
1195+
10951196
return codeGenIP;
10961197
};
10971198

@@ -3009,12 +3110,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
30093110
.Case([&](omp::TargetOp) {
30103111
return convertOmpTarget(*op, builder, moduleTranslation);
30113112
})
3012-
.Case<omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
3013-
// No-op, should be handled by relevant owning operations e.g.
3014-
// TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3015-
// discarded
3016-
return success();
3017-
})
3113+
.Case<omp::MapInfoOp, omp::DataBoundsOp, omp::PrivateClauseOp>(
3114+
[&](auto op) {
3115+
// No-op, should be handled by relevant owning operations e.g.
3116+
// TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3117+
// discarded
3118+
return success();
3119+
})
30183120
.Default([&](Operation *inst) {
30193121
return inst->emitError("unsupported OpenMP operation: ")
30203122
<< inst->getName();
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
2+
// `omp.private` ops).
3+
4+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
5+
6+
llvm.func @parallel_op_1_private(%arg0: !llvm.ptr) {
7+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
8+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
9+
omp.terminator
10+
}
11+
llvm.return
12+
}
13+
14+
// CHECK-LABEL: @parallel_op_1_private
15+
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
16+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
17+
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
18+
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
19+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_1_private..omp_par, ptr %[[OMP_PAR_ARG]])
20+
// CHECK: }
21+
22+
// CHECK-LABEL: void @parallel_op_1_private..omp_par
23+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
24+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
25+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
26+
27+
// Check that the privatizer alloc region was inlined properly.
28+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
29+
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
30+
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
31+
// CHECK-NEXT: br
32+
33+
// Check that the privatized value is used (rather than the original one).
34+
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
35+
// CHECK: }
36+
37+
llvm.func @parallel_op_2_privates(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
38+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr, @y.privatizer %arg1 -> %arg3 : !llvm.ptr) {
39+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
40+
%1 = llvm.load %arg3 : !llvm.ptr -> i32
41+
omp.terminator
42+
}
43+
llvm.return
44+
}
45+
46+
// CHECK-LABEL: @parallel_op_2_privates
47+
// CHECK-SAME: (ptr %[[ORIG1:.*]], ptr %[[ORIG2:.*]]) {
48+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr, ptr }, align 8
49+
// CHECK: %[[ORIG1_GEP:.*]] = getelementptr { ptr, ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
50+
// CHECK: store ptr %[[ORIG1]], ptr %[[ORIG1_GEP]], align 8
51+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_2_privates..omp_par, ptr %[[OMP_PAR_ARG]])
52+
// CHECK: }
53+
54+
// CHECK-LABEL: void @parallel_op_2_privates..omp_par
55+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
56+
// CHECK: %[[ORIG1_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 0
57+
// CHECK: %[[ORIG1_PTR:.*]] = load ptr, ptr %[[ORIG1_PTR_PTR]], align 8
58+
// CHECK: %[[ORIG2_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 1
59+
// CHECK: %[[ORIG2_PTR:.*]] = load ptr, ptr %[[ORIG2_PTR_PTR]], align 8
60+
61+
// Check that the privatizer alloc region was inlined properly.
62+
// CHECK: %[[PRIV1_ALLOC:.*]] = alloca float, align 4
63+
// CHECK: %[[ORIG1_VAL:.*]] = load float, ptr %[[ORIG1_PTR]], align 4
64+
// CHECK: store float %[[ORIG1_VAL]], ptr %[[PRIV1_ALLOC]], align 4
65+
// CHECK: %[[PRIV2_ALLOC:.*]] = alloca i32, align 4
66+
// CHECK: %[[ORIG2_VAL:.*]] = load i32, ptr %[[ORIG2_PTR]], align 4
67+
// CHECK: store i32 %[[ORIG2_VAL]], ptr %[[PRIV2_ALLOC]], align 4
68+
// CHECK-NEXT: br
69+
70+
// Check that the privatized value is used (rather than the original one).
71+
// CHECK: load float, ptr %[[PRIV1_ALLOC]], align 4
72+
// CHECK: load i32, ptr %[[PRIV2_ALLOC]], align 4
73+
// CHECK: }
74+
75+
omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
76+
^bb0(%arg0: !llvm.ptr):
77+
%c1 = llvm.mlir.constant(1 : i32) : i32
78+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
79+
%1 = llvm.load %arg0 : !llvm.ptr -> f32
80+
llvm.store %1, %0 : f32, !llvm.ptr
81+
omp.yield(%0 : !llvm.ptr)
82+
}
83+
84+
omp.private {type = private} @y.privatizer : !llvm.ptr alloc {
85+
^bb0(%arg0: !llvm.ptr):
86+
%c1 = llvm.mlir.constant(1 : i32) : i32
87+
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
88+
%1 = llvm.load %arg0 : !llvm.ptr -> i32
89+
llvm.store %1, %0 : i32, !llvm.ptr
90+
omp.yield(%0 : !llvm.ptr)
91+
}

0 commit comments

Comments
 (0)