@@ -1000,6 +1000,24 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
1000
1000
return success ();
1001
1001
}
1002
1002
1003
+ // / Replace the region arguments of the parallel op (which correspond to private
1004
+ // / variables) with the actual private varibles they correspond to. This
1005
+ // / prepares the parallel op so that it matches what is expected by the
1006
+ // / OMPIRBuilder.
1007
+ static void prepareOmpParallelForPrivatization (omp::ParallelOp opInst) {
1008
+ Region ®ion = opInst.getRegion ();
1009
+ auto privateVars = opInst.getPrivateVars ();
1010
+
1011
+ auto privateVarsIt = privateVars.begin ();
1012
+ // Reduction precede private arguments, so skip them first.
1013
+ unsigned privateArgBeginIdx = opInst.getNumReductionVars ();
1014
+ unsigned privateArgEndIdx = privateArgBeginIdx + privateVars.size ();
1015
+ for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1016
+ ++argIdx, ++privateVarsIt)
1017
+ replaceAllUsesInRegionWith (region.getArgument (argIdx), *privateVarsIt,
1018
+ region);
1019
+ }
1020
+
1003
1021
// / Converts the OpenMP parallel operation to LLVM IR.
1004
1022
static LogicalResult
1005
1023
convertOmpParallel (omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1043,6 +1061,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1043
1061
builder.CreateStore (phis[0 ], privateReductionVariables[i]);
1044
1062
}
1045
1063
1064
+ prepareOmpParallelForPrivatization (opInst);
1065
+
1046
1066
// Save the alloca insertion point on ModuleTranslation stack for use in
1047
1067
// nested regions.
1048
1068
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame (
@@ -1086,12 +1106,98 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1086
1106
1087
1107
// TODO: Perform appropriate actions according to the data-sharing
1088
1108
// attribute (shared, private, firstprivate, ...) of variables.
1089
- // Currently defaults to shared .
1109
+ // Currently shared and private are supported .
1090
1110
auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1091
1111
llvm::Value &, llvm::Value &vPtr,
1092
1112
llvm::Value *&replacementValue) -> InsertPointTy {
1093
1113
replacementValue = &vPtr;
1094
1114
1115
+ // If this is a private value, this lambda will return the corresponding
1116
+ // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1117
+ // returned.
1118
+ auto [privVar, privatizerClone] =
1119
+ [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1120
+ if (!opInst.getPrivateVars ().empty ()) {
1121
+ auto privVars = opInst.getPrivateVars ();
1122
+ auto privatizers = opInst.getPrivatizers ();
1123
+
1124
+ for (auto [privVar, privatizerAttr] :
1125
+ llvm::zip_equal (privVars, *privatizers)) {
1126
+ // Find the MLIR private variable corresponding to the LLVM value
1127
+ // being privatized.
1128
+ llvm::Value *llvmPrivVar = moduleTranslation.lookupValue (privVar);
1129
+ if (llvmPrivVar != &vPtr)
1130
+ continue ;
1131
+
1132
+ SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1133
+ omp::PrivateClauseOp privatizer =
1134
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1135
+ opInst, privSym);
1136
+
1137
+ // Clone the privatizer in case it used by more than one parallel
1138
+ // region. The privatizer is processed in-place (see below) before it
1139
+ // gets inlined in the parallel region and therefore processing the
1140
+ // original op is dangerous.
1141
+ return {privVar, privatizer.clone ()};
1142
+ }
1143
+ }
1144
+
1145
+ return {mlir::Value (), omp::PrivateClauseOp ()};
1146
+ }();
1147
+
1148
+ if (privVar) {
1149
+ if (privatizerClone.getDataSharingType () ==
1150
+ omp::DataSharingClauseType::FirstPrivate) {
1151
+ privatizerClone.emitOpError (
1152
+ " TODO: delayed privatization is not "
1153
+ " supported for `firstprivate` clauses yet." );
1154
+ bodyGenStatus = failure ();
1155
+ return codeGenIP;
1156
+ }
1157
+
1158
+ Region &allocRegion = privatizerClone.getAllocRegion ();
1159
+
1160
+ if (!allocRegion.hasOneBlock ()) {
1161
+ privatizerClone.emitOpError (
1162
+ " TODO: multi-block alloc regions are not supported yet. Seems "
1163
+ " like there is a difference in `inlineConvertOmpRegions`'s "
1164
+ " pre-conditions for single- and multi-block regions." );
1165
+ bodyGenStatus = failure ();
1166
+ return codeGenIP;
1167
+ }
1168
+
1169
+ // Replace the privatizer block argument with mlir value being privatized.
1170
+ // This way, the body of the privatizer will be changed from using the
1171
+ // region/block argument to the value being privatized.
1172
+ auto allocRegionArg = allocRegion.getArgument (0 );
1173
+ replaceAllUsesInRegionWith (allocRegionArg, privVar, allocRegion);
1174
+
1175
+ auto oldIP = builder.saveIP ();
1176
+ builder.restoreIP (allocaIP);
1177
+
1178
+ // Temporarily unlink the terminator from its parent since
1179
+ // `inlineConvertOmpRegions` expects the insertion block to **not**
1180
+ // contain a terminator.
1181
+ llvm::Instruction &allocaTerminator = builder.GetInsertBlock ()->back ();
1182
+ assert (allocaTerminator.isTerminator ());
1183
+ allocaTerminator.removeFromParent ();
1184
+
1185
+ SmallVector<llvm::Value *, 1 > yieldedValues;
1186
+ if (failed (inlineConvertOmpRegions (allocRegion, " omp.privatizer" , builder,
1187
+ moduleTranslation, &yieldedValues))) {
1188
+ opInst.emitError (" failed to inline `alloc` region of an `omp.private` "
1189
+ " op in the parallel region" );
1190
+ bodyGenStatus = failure ();
1191
+ } else {
1192
+ assert (yieldedValues.size () == 1 );
1193
+ replacementValue = yieldedValues.front ();
1194
+ }
1195
+
1196
+ allocaTerminator.insertAfter (&builder.GetInsertBlock ()->back ());
1197
+ privatizerClone.erase ();
1198
+ builder.restoreIP (oldIP);
1199
+ }
1200
+
1095
1201
return codeGenIP;
1096
1202
};
1097
1203
@@ -3009,12 +3115,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3009
3115
.Case ([&](omp::TargetOp) {
3010
3116
return convertOmpTarget (*op, builder, moduleTranslation);
3011
3117
})
3012
- .Case <omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
3013
- // No-op, should be handled by relevant owning operations e.g.
3014
- // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3015
- // discarded
3016
- return success ();
3017
- })
3118
+ .Case <omp::MapInfoOp, omp::DataBoundsOp, omp::PrivateClauseOp>(
3119
+ [&](auto op) {
3120
+ // No-op, should be handled by relevant owning operations e.g.
3121
+ // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
3122
+ // discarded
3123
+ return success ();
3124
+ })
3018
3125
.Default ([&](Operation *inst) {
3019
3126
return inst->emitError (" unsupported OpenMP operation: " )
3020
3127
<< inst->getName ();
0 commit comments