Skip to content

Commit 0e1137a

Browse files
committed
[MLIR][OpenMP] Support basic materialization for omp.private ops
Adds basic support for materializing delayed privatization. So far, the restrictions on the implementation are: - Only `private` clauses are supported (`firstprivate` support will be added in a later PR). - Only single-block `omp.private -> alloc` regions are supported (multi-block ones will be supported in a later PR).
1 parent a8a5770 commit 0e1137a

File tree

2 files changed

+205
-7
lines changed

2 files changed

+205
-7
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,24 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
10001000
return success();
10011001
}
10021002

1003+
/// Replace the region arguments of the parallel op (which correspond to private
1004+
/// variables) with the actual private varibles they correspond to. This
1005+
/// prepares the parallel op so that it matches what is expected by the
1006+
/// OMPIRBuilder.
1007+
static void prepareOmpParallelForPrivatization(omp::ParallelOp opInst) {
1008+
Region &region = opInst.getRegion();
1009+
auto privateVars = opInst.getPrivateVars();
1010+
1011+
auto privateVarsIt = privateVars.begin();
1012+
// Reduction precede private arguments, so skip them first.
1013+
unsigned privateArgBeginIdx = opInst.getNumReductionVars();
1014+
unsigned privateArgEndIdx = privateArgBeginIdx + privateVars.size();
1015+
for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1016+
++argIdx, ++privateVarsIt)
1017+
replaceAllUsesInRegionWith(region.getArgument(argIdx), *privateVarsIt,
1018+
region);
1019+
}
1020+
10031021
/// Converts the OpenMP parallel operation to LLVM IR.
10041022
static LogicalResult
10051023
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1043,6 +1061,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
10431061
builder.CreateStore(phis[0], privateReductionVariables[i]);
10441062
}
10451063

1064+
prepareOmpParallelForPrivatization(opInst);
1065+
10461066
// Save the alloca insertion point on ModuleTranslation stack for use in
10471067
// nested regions.
10481068
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
@@ -1086,12 +1106,98 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
10861106

10871107
// TODO: Perform appropriate actions according to the data-sharing
10881108
// attribute (shared, private, firstprivate, ...) of variables.
1089-
// Currently defaults to shared.
1109+
// Currently shared and private are supported.
10901110
auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
10911111
llvm::Value &, llvm::Value &vPtr,
10921112
llvm::Value *&replacementValue) -> InsertPointTy {
10931113
replacementValue = &vPtr;
10941114

1115+
// If this is a private value, this lambda will return the corresponding
1116+
// mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1117+
// returned.
1118+
auto [privVar, privatizerClone] =
1119+
[&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1120+
if (!opInst.getPrivateVars().empty()) {
1121+
auto privVars = opInst.getPrivateVars();
1122+
auto privatizers = opInst.getPrivatizers();
1123+
1124+
for (auto [privVar, privatizerAttr] :
1125+
llvm::zip_equal(privVars, *privatizers)) {
1126+
// Find the MLIR private variable corresponding to the LLVM value
1127+
// being privatized.
1128+
llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
1129+
if (llvmPrivVar != &vPtr)
1130+
continue;
1131+
1132+
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1133+
omp::PrivateClauseOp privatizer =
1134+
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1135+
opInst, privSym);
1136+
1137+
// Clone the privatizer in case it used by more than one parallel
1138+
// region. The privatizer is processed in-place (see below) before it
1139+
// gets inlined in the parallel region and therefore processing the
1140+
// original op is dangerous.
1141+
return {privVar, privatizer.clone()};
1142+
}
1143+
}
1144+
1145+
return {mlir::Value(), omp::PrivateClauseOp()};
1146+
}();
1147+
1148+
if (privVar) {
1149+
if (privatizerClone.getDataSharingType() ==
1150+
omp::DataSharingClauseType::FirstPrivate) {
1151+
privatizerClone.emitOpError(
1152+
"TODO: delayed privatization is not "
1153+
"supported for `firstprivate` clauses yet.");
1154+
bodyGenStatus = failure();
1155+
return codeGenIP;
1156+
}
1157+
1158+
Region &allocRegion = privatizerClone.getAllocRegion();
1159+
1160+
if (!allocRegion.hasOneBlock()) {
1161+
privatizerClone.emitOpError(
1162+
"TODO: multi-block alloc regions are not supported yet. Seems "
1163+
"like there is a difference in `inlineConvertOmpRegions`'s "
1164+
"pre-conditions for single- and multi-block regions.");
1165+
bodyGenStatus = failure();
1166+
return codeGenIP;
1167+
}
1168+
1169+
// Replace the privatizer block argument with mlir value being privatized.
1170+
// This way, the body of the privatizer will be changed from using the
1171+
// region/block argument to the value being privatized.
1172+
auto allocRegionArg = allocRegion.getArgument(0);
1173+
replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1174+
1175+
auto oldIP = builder.saveIP();
1176+
builder.restoreIP(allocaIP);
1177+
1178+
// Temporarily unlink the terminator from its parent since
1179+
// `inlineConvertOmpRegions` expects the insertion block to **not**
1180+
// contain a terminator.
1181+
llvm::Instruction &allocaTerminator = builder.GetInsertBlock()->back();
1182+
assert(allocaTerminator.isTerminator());
1183+
allocaTerminator.removeFromParent();
1184+
1185+
SmallVector<llvm::Value *, 1> yieldedValues;
1186+
if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1187+
moduleTranslation, &yieldedValues))) {
1188+
opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1189+
"op in the parallel region");
1190+
bodyGenStatus = failure();
1191+
} else {
1192+
assert(yieldedValues.size() == 1);
1193+
replacementValue = yieldedValues.front();
1194+
}
1195+
1196+
allocaTerminator.insertAfter(&builder.GetInsertBlock()->back());
1197+
privatizerClone.erase();
1198+
builder.restoreIP(oldIP);
1199+
}
1200+
10951201
return codeGenIP;
10961202
};
10971203

@@ -3009,12 +3115,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
30093115
.Case([&](omp::TargetOp) {
30103116
return convertOmpTarget(*op, builder, moduleTranslation);
30113117
})
3012-
.Case<omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
3013-
// No-op, should be handled by relevant owning operations e.g.
3014-
// TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3015-
// discarded
3016-
return success();
3017-
})
3118+
.Case<omp::MapInfoOp, omp::DataBoundsOp, omp::PrivateClauseOp>(
3119+
[&](auto op) {
3120+
// No-op, should be handled by relevant owning operations e.g.
3121+
// TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3122+
// discarded
3123+
return success();
3124+
})
30183125
.Default([&](Operation *inst) {
30193126
return inst->emitError("unsupported OpenMP operation: ")
30203127
<< inst->getName();
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
2+
// `omp.private` ops).
3+
4+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
5+
6+
llvm.func @parallel_op_1_private(%arg0: !llvm.ptr) {
7+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
8+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
9+
omp.terminator
10+
}
11+
llvm.return
12+
}
13+
14+
// CHECK-LABEL: @parallel_op_1_private
15+
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
16+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
17+
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
18+
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
19+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_1_private..omp_par, ptr %[[OMP_PAR_ARG]])
20+
// CHECK: }
21+
22+
// CHECK-LABEL: void @parallel_op_1_private..omp_par
23+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
24+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
25+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
26+
27+
// Check that the privatizer alloc region was inlined properly.
28+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
29+
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
30+
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
31+
// CHECK-NEXT: br
32+
33+
// Check that the privatized value is used (rather than the original one).
34+
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
35+
// CHECK: }
36+
37+
llvm.func @parallel_op_2_privates(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
38+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr, @y.privatizer %arg1 -> %arg3 : !llvm.ptr) {
39+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
40+
%1 = llvm.load %arg3 : !llvm.ptr -> i32
41+
omp.terminator
42+
}
43+
llvm.return
44+
}
45+
46+
// CHECK-LABEL: @parallel_op_2_privates
47+
// CHECK-SAME: (ptr %[[ORIG1:.*]], ptr %[[ORIG2:.*]]) {
48+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr, ptr }, align 8
49+
// CHECK: %[[ORIG1_GEP:.*]] = getelementptr { ptr, ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
50+
// CHECK: store ptr %[[ORIG1]], ptr %[[ORIG1_GEP]], align 8
51+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_2_privates..omp_par, ptr %[[OMP_PAR_ARG]])
52+
// CHECK: }
53+
54+
// CHECK-LABEL: void @parallel_op_2_privates..omp_par
55+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
56+
// CHECK: %[[ORIG1_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 0
57+
// CHECK: %[[ORIG1_PTR:.*]] = load ptr, ptr %[[ORIG1_PTR_PTR]], align 8
58+
// CHECK: %[[ORIG2_PTR_PTR:.*]] = getelementptr { ptr, ptr }, ptr %[[ARG]], i32 0, i32 1
59+
// CHECK: %[[ORIG2_PTR:.*]] = load ptr, ptr %[[ORIG2_PTR_PTR]], align 8
60+
61+
// Check that the privatizer alloc region was inlined properly.
62+
// CHECK: %[[PRIV1_ALLOC:.*]] = alloca float, align 4
63+
// CHECK: %[[ORIG1_VAL:.*]] = load float, ptr %[[ORIG1_PTR]], align 4
64+
// CHECK: store float %[[ORIG1_VAL]], ptr %[[PRIV1_ALLOC]], align 4
65+
// CHECK: %[[PRIV2_ALLOC:.*]] = alloca i32, align 4
66+
// CHECK: %[[ORIG2_VAL:.*]] = load i32, ptr %[[ORIG2_PTR]], align 4
67+
// CHECK: store i32 %[[ORIG2_VAL]], ptr %[[PRIV2_ALLOC]], align 4
68+
// CHECK-NEXT: br
69+
70+
// Check that the privatized value is used (rather than the original one).
71+
// CHECK: load float, ptr %[[PRIV1_ALLOC]], align 4
72+
// CHECK: load i32, ptr %[[PRIV2_ALLOC]], align 4
73+
// CHECK: }
74+
75+
omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
76+
^bb0(%arg0: !llvm.ptr):
77+
%c1 = llvm.mlir.constant(1 : i32) : i32
78+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
79+
%1 = llvm.load %arg0 : !llvm.ptr -> f32
80+
llvm.store %1, %0 : f32, !llvm.ptr
81+
omp.yield(%0 : !llvm.ptr)
82+
}
83+
84+
omp.private {type = private} @y.privatizer : !llvm.ptr alloc {
85+
^bb0(%arg0: !llvm.ptr):
86+
%c1 = llvm.mlir.constant(1 : i32) : i32
87+
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
88+
%1 = llvm.load %arg0 : !llvm.ptr -> i32
89+
llvm.store %1, %0 : i32, !llvm.ptr
90+
omp.yield(%0 : !llvm.ptr)
91+
}

0 commit comments

Comments
 (0)