@@ -239,11 +239,12 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
239
239
return alloc;
240
240
};
241
241
242
- auto moveToSingle = [&](SingleRegion sr, OpBuilder allocaBuilder,
243
- OpBuilder singleBuilder,
244
- OpBuilder parallelBuilder) -> SmallVector<Value> {
242
+ auto moveToSingle =
243
+ [&](SingleRegion sr, OpBuilder allocaBuilder, OpBuilder singleBuilder,
244
+ OpBuilder parallelBuilder) -> std::pair< bool , SmallVector<Value> > {
245
245
IRMapping singleMapping = rootMapping;
246
246
SmallVector<Value> copyPrivate;
247
+ bool allParallelized = true ;
247
248
248
249
for (Operation &op : llvm::make_range (sr.begin , sr.end )) {
249
250
if (isSafeToParallelize (&op)) {
@@ -267,13 +268,15 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
267
268
assert (llvm::all_of (op.getResults (), [&](Value v) {
268
269
return !isTransitivelyUsedOutside (v, sr);
269
270
}));
271
+ allParallelized = false ;
270
272
}
271
273
} else if (auto alloca = dyn_cast<fir::AllocaOp>(&op)) {
272
274
auto hoisted =
273
275
cast<fir::AllocaOp>(allocaBuilder.clone (*alloca, singleMapping));
274
276
rootMapping.map (&*alloca, &*hoisted);
275
277
rootMapping.map (alloca.getResult (), hoisted.getResult ());
276
278
copyPrivate.push_back (hoisted);
279
+ allParallelized = false ;
277
280
} else {
278
281
singleBuilder.clone (op, singleMapping);
279
282
// Prepare reloaded values for results of operations that cannot be
@@ -286,10 +289,11 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
286
289
copyPrivate.push_back (alloc);
287
290
}
288
291
}
292
+ allParallelized = false ;
289
293
}
290
294
}
291
295
singleBuilder.create <omp::TerminatorOp>(loc);
292
- return copyPrivate;
296
+ return {allParallelized, copyPrivate} ;
293
297
};
294
298
295
299
for (Block &block : sourceRegion) {
@@ -343,25 +347,35 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
343
347
Block *parallelBlock = new Block ();
344
348
parallelBuilder.setInsertionPointToStart (parallelBlock);
345
349
346
- omp::SingleOperands singleOperands;
347
- if (isLast)
348
- singleOperands.nowait = rootBuilder.getUnitAttr ();
349
- singleOperands.copyprivateVars =
350
+ auto [allParallelized, copyprivateVars] =
350
351
moveToSingle (std::get<SingleRegion>(opOrSingle), allocaBuilder,
351
352
singleBuilder, parallelBuilder);
352
- cleanupBlock (singleBlock);
353
- for (auto var : singleOperands.copyprivateVars ) {
354
- mlir::func::FuncOp funcOp =
355
- createCopyFunc (loc, var.getType (), firCopyFuncBuilder);
356
- singleOperands.copyprivateSyms .push_back (SymbolRefAttr::get (funcOp));
353
+ if (allParallelized) {
354
+ // The single region was not required as all operations were safe to
355
+ // parallelize
356
+ assert (copyprivateVars.empty ());
357
+ assert (allocaBlock->empty ());
358
+ delete singleBlock;
359
+ } else {
360
+ omp::SingleOperands singleOperands;
361
+ if (isLast)
362
+ singleOperands.nowait = rootBuilder.getUnitAttr ();
363
+ singleOperands.copyprivateVars = copyprivateVars;
364
+ cleanupBlock (singleBlock);
365
+ for (auto var : singleOperands.copyprivateVars ) {
366
+ mlir::func::FuncOp funcOp =
367
+ createCopyFunc (loc, var.getType (), firCopyFuncBuilder);
368
+ singleOperands.copyprivateSyms .push_back (
369
+ SymbolRefAttr::get (funcOp));
370
+ }
371
+ omp::SingleOp singleOp =
372
+ rootBuilder.create <omp::SingleOp>(loc, singleOperands);
373
+ singleOp.getRegion ().push_back (singleBlock);
374
+ targetRegion.front ().getOperations ().splice (
375
+ singleOp->getIterator (), allocaBlock->getOperations ());
357
376
}
358
- omp::SingleOp singleOp =
359
- rootBuilder.create <omp::SingleOp>(loc, singleOperands);
360
- singleOp.getRegion ().push_back (singleBlock);
361
377
rootBuilder.getInsertionBlock ()->getOperations ().splice (
362
378
rootBuilder.getInsertionPoint (), parallelBlock->getOperations ());
363
- targetRegion.front ().getOperations ().splice (
364
- singleOp->getIterator (), allocaBlock->getOperations ());
365
379
delete allocaBlock;
366
380
delete parallelBlock;
367
381
} else {
0 commit comments