Skip to content

Commit 0e70e0e

Browse files
authored
[reapply (#118463)][OpenMP][OMPIRBuilder] Add delayed privatization support for wsloop (#119170)
This reapplies PR #118463 after introducing a fix for a bug uncovere by the test suite. The problem is that when the alloca block is terminated with a conditional branch, this violates a pre-condition of `allocatePrivateVars` (which assumes the alloca block has a single successor). This new PR includes a test that reproduces the issue. Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions.
1 parent b3e4987 commit 0e70e0e

File tree

4 files changed

+289
-140
lines changed

4 files changed

+289
-140
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 147 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
268268
checkAllocate(op, result);
269269
checkLinear(op, result);
270270
checkOrder(op, result);
271-
checkPrivate(op, result);
272271
})
273272
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
274273
.Case([&](omp::SimdOp op) {
@@ -1302,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
13021301
MutableArrayRef<mlir::Value> mlirPrivateVars,
13031302
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
13041303
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
1304+
llvm::IRBuilderBase::InsertPointGuard guard(builder);
13051305
// Allocate private vars
13061306
llvm::BranchInst *allocaTerminator =
13071307
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1363,6 +1363,86 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
13631363
return afterAllocas;
13641364
}
13651365

1366+
static LogicalResult
1367+
initFirstPrivateVars(llvm::IRBuilderBase &builder,
1368+
LLVM::ModuleTranslation &moduleTranslation,
1369+
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1370+
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1371+
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
1372+
llvm::BasicBlock *afterAllocas) {
1373+
llvm::IRBuilderBase::InsertPointGuard guard(builder);
1374+
// Apply copy region for firstprivate.
1375+
bool needsFirstprivate =
1376+
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1377+
return privOp.getDataSharingType() ==
1378+
omp::DataSharingClauseType::FirstPrivate;
1379+
});
1380+
1381+
if (!needsFirstprivate)
1382+
return success();
1383+
1384+
assert(afterAllocas->getSinglePredecessor());
1385+
1386+
// Find the end of the allocation blocks
1387+
builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator());
1388+
llvm::BasicBlock *copyBlock =
1389+
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1390+
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1391+
1392+
for (auto [decl, mlirVar, llvmVar] :
1393+
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1394+
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1395+
continue;
1396+
1397+
// copyRegion implements `lhs = rhs`
1398+
Region &copyRegion = decl.getCopyRegion();
1399+
1400+
// map copyRegion rhs arg
1401+
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1402+
assert(nonPrivateVar);
1403+
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1404+
1405+
// map copyRegion lhs arg
1406+
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1407+
1408+
// in-place convert copy region
1409+
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1410+
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1411+
moduleTranslation)))
1412+
return decl.emitError("failed to inline `copy` region of `omp.private`");
1413+
1414+
// ignore unused value yielded from copy region
1415+
1416+
// clear copy region block argument mapping in case it needs to be
1417+
// re-created with different sources for reuse of the same reduction
1418+
// decl
1419+
moduleTranslation.forgetMapping(copyRegion);
1420+
}
1421+
1422+
return success();
1423+
}
1424+
1425+
static LogicalResult
1426+
cleanupPrivateVars(llvm::IRBuilderBase &builder,
1427+
LLVM::ModuleTranslation &moduleTranslation, Location loc,
1428+
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1429+
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1430+
// private variable deallocation
1431+
SmallVector<Region *> privateCleanupRegions;
1432+
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1433+
[](omp::PrivateClauseOp privatizer) {
1434+
return &privatizer.getDeallocRegion();
1435+
});
1436+
1437+
if (failed(inlineOmpRegionCleanup(
1438+
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1439+
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1440+
return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1441+
"`omp.private` op in");
1442+
1443+
return success();
1444+
}
1445+
13661446
static LogicalResult
13671447
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
13681448
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1622,50 +1702,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
16221702
if (handleError(afterAllocas, *taskOp).failed())
16231703
return llvm::make_error<PreviouslyReportedError>();
16241704

1625-
// Apply copy region for firstprivate
1626-
bool needsFirstPrivate =
1627-
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1628-
return privOp.getDataSharingType() ==
1629-
omp::DataSharingClauseType::FirstPrivate;
1630-
});
1631-
if (needsFirstPrivate) {
1632-
// Find the end of the allocation blocks
1633-
assert(afterAllocas.get()->getSinglePredecessor());
1634-
builder.SetInsertPoint(
1635-
afterAllocas.get()->getSinglePredecessor()->getTerminator());
1636-
llvm::BasicBlock *copyBlock =
1637-
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1638-
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1639-
}
1640-
for (auto [decl, mlirVar, llvmVar] :
1641-
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1642-
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1643-
continue;
1644-
1645-
// copyRegion implements `lhs = rhs`
1646-
Region &copyRegion = decl.getCopyRegion();
1647-
1648-
// map copyRegion rhs arg
1649-
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1650-
assert(nonPrivateVar);
1651-
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1652-
1653-
// map copyRegion lhs arg
1654-
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1655-
1656-
// in-place convert copy region
1657-
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1658-
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1659-
builder, moduleTranslation)))
1660-
return llvm::createStringError(
1661-
"failed to inline `copy` region of an `omp.private` op in taskOp");
1662-
1663-
// ignore unused value yielded from copy region
1664-
1665-
// clear copy region block argument mapping in case it needs to be
1666-
// re-created with different source for reuse of the same reduction decl
1667-
moduleTranslation.forgetMapping(copyRegion);
1668-
}
1705+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1706+
llvmPrivateVars, privateDecls,
1707+
afterAllocas.get())))
1708+
return llvm::make_error<PreviouslyReportedError>();
16691709

16701710
// translate the body of the task:
16711711
builder.restoreIP(codegenIP);
@@ -1674,19 +1714,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
16741714
if (failed(handleError(continuationBlockOrError, *taskOp)))
16751715
return llvm::make_error<PreviouslyReportedError>();
16761716

1677-
// private variable deallocation
1678-
SmallVector<Region *> privateCleanupRegions;
1679-
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1680-
[](omp::PrivateClauseOp privatizer) {
1681-
return &privatizer.getDeallocRegion();
1682-
});
1683-
16841717
builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
1685-
if (failed(inlineOmpRegionCleanup(
1686-
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1687-
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1688-
return llvm::createStringError("failed to inline `dealloc` region of an "
1689-
"`omp.private` op in an omp.task");
1718+
1719+
if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
1720+
llvmPrivateVars, privateDecls)))
1721+
return llvm::make_error<PreviouslyReportedError>();
16901722

16911723
return llvm::Error::success();
16921724
};
@@ -1760,7 +1792,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
17601792
return failure();
17611793

17621794
auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1763-
17641795
llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
17651796
assert(isByRef.size() == wsloopOp.getNumReductionVars());
17661797

@@ -1778,22 +1809,61 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
17781809
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
17791810
}
17801811

1812+
MutableArrayRef<BlockArgument> privateBlockArgs =
1813+
cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
1814+
SmallVector<mlir::Value> mlirPrivateVars;
1815+
SmallVector<llvm::Value *> llvmPrivateVars;
1816+
SmallVector<omp::PrivateClauseOp> privateDecls;
1817+
mlirPrivateVars.reserve(privateBlockArgs.size());
1818+
llvmPrivateVars.reserve(privateBlockArgs.size());
1819+
collectPrivatizationDecls(wsloopOp, privateDecls);
1820+
1821+
for (mlir::Value privateVar : wsloopOp.getPrivateVars())
1822+
mlirPrivateVars.push_back(privateVar);
1823+
17811824
SmallVector<omp::DeclareReductionOp> reductionDecls;
17821825
collectReductionDecls(wsloopOp, reductionDecls);
17831826
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
17841827
findAllocaInsertPoint(builder, moduleTranslation);
17851828

17861829
SmallVector<llvm::Value *> privateReductionVariables(
17871830
wsloopOp.getNumReductionVars());
1831+
1832+
splitBB(llvm::OpenMPIRBuilder::InsertPointTy(
1833+
allocaIP.getBlock(),
1834+
allocaIP.getBlock()->getTerminator()->getIterator()),
1835+
true, "omp.region.after_alloca");
1836+
1837+
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
1838+
builder, moduleTranslation, privateBlockArgs, privateDecls,
1839+
mlirPrivateVars, llvmPrivateVars, allocaIP);
1840+
if (handleError(afterAllocas, opInst).failed())
1841+
return failure();
1842+
17881843
DenseMap<Value, llvm::Value *> reductionVariableMap;
17891844

17901845
MutableArrayRef<BlockArgument> reductionArgs =
17911846
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
17921847

1793-
if (failed(allocAndInitializeReductionVars(
1794-
wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1795-
reductionDecls, privateReductionVariables, reductionVariableMap,
1796-
isByRef)))
1848+
SmallVector<DeferredStore> deferredStores;
1849+
1850+
if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
1851+
moduleTranslation, allocaIP, reductionDecls,
1852+
privateReductionVariables, reductionVariableMap,
1853+
deferredStores, isByRef)))
1854+
return failure();
1855+
1856+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1857+
llvmPrivateVars, privateDecls,
1858+
afterAllocas.get())))
1859+
return failure();
1860+
1861+
assert(afterAllocas.get()->getSinglePredecessor());
1862+
if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
1863+
moduleTranslation,
1864+
afterAllocas.get()->getSinglePredecessor(),
1865+
reductionDecls, privateReductionVariables,
1866+
reductionVariableMap, isByRef, deferredStores)))
17971867
return failure();
17981868

17991869
// TODO: Replace this with proper composite translation support.
@@ -1900,9 +1970,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
19001970
builder.restoreIP(afterIP);
19011971

19021972
// Process the reductions if required.
1903-
return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1904-
allocaIP, reductionDecls,
1905-
privateReductionVariables, isByRef);
1973+
if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1974+
allocaIP, reductionDecls,
1975+
privateReductionVariables, isByRef)))
1976+
return failure();
1977+
1978+
return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
1979+
llvmPrivateVars, privateDecls);
19061980
}
19071981

19081982
/// Converts the OpenMP parallel operation to LLVM IR.
@@ -1960,52 +2034,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
19602034
deferredStores, isByRef)))
19612035
return llvm::make_error<PreviouslyReportedError>();
19622036

1963-
// Apply copy region for firstprivate.
1964-
bool needsFirstprivate =
1965-
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1966-
return privOp.getDataSharingType() ==
1967-
omp::DataSharingClauseType::FirstPrivate;
1968-
});
1969-
if (needsFirstprivate) {
1970-
// Find the end of the allocation blocks
1971-
assert(afterAllocas.get()->getSinglePredecessor());
1972-
builder.SetInsertPoint(
1973-
afterAllocas.get()->getSinglePredecessor()->getTerminator());
1974-
llvm::BasicBlock *copyBlock =
1975-
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1976-
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1977-
}
1978-
for (auto [decl, mlirVar, llvmVar] :
1979-
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1980-
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1981-
continue;
1982-
1983-
// copyRegion implements `lhs = rhs`
1984-
Region &copyRegion = decl.getCopyRegion();
1985-
1986-
// map copyRegion rhs arg
1987-
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1988-
assert(nonPrivateVar);
1989-
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1990-
1991-
// map copyRegion lhs arg
1992-
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1993-
1994-
// in-place convert copy region
1995-
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1996-
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1997-
builder, moduleTranslation)))
1998-
return llvm::createStringError(
1999-
"failed to inline `copy` region of `omp.private`");
2000-
2001-
// ignore unused value yielded from copy region
2002-
2003-
// clear copy region block argument mapping in case it needs to be
2004-
// re-created with different sources for reuse of the same reduction
2005-
// decl
2006-
moduleTranslation.forgetMapping(copyRegion);
2007-
}
2037+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
2038+
llvmPrivateVars, privateDecls,
2039+
afterAllocas.get())))
2040+
return llvm::make_error<PreviouslyReportedError>();
20082041

2042+
assert(afterAllocas.get()->getSinglePredecessor());
20092043
if (failed(
20102044
initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
20112045
afterAllocas.get()->getSinglePredecessor(),
@@ -2090,17 +2124,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
20902124
return llvm::createStringError(
20912125
"failed to inline `cleanup` region of `omp.declare_reduction`");
20922126

2093-
SmallVector<Region *> privateCleanupRegions;
2094-
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
2095-
[](omp::PrivateClauseOp privatizer) {
2096-
return &privatizer.getDeallocRegion();
2097-
});
2098-
2099-
if (failed(inlineOmpRegionCleanup(
2100-
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
2101-
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
2102-
return llvm::createStringError(
2103-
"failed to inline `dealloc` region of `omp.private`");
2127+
if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2128+
llvmPrivateVars, privateDecls)))
2129+
return llvm::make_error<PreviouslyReportedError>();
21042130

21052131
builder.restoreIP(oldIP);
21062132
return llvm::Error::success();

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -635,22 +635,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
635635
}
636636
llvm.return
637637
}
638-
639-
// -----
640-
641-
omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
642-
^bb0(%arg0: !llvm.ptr):
643-
%0 = llvm.mlir.constant(1 : i32) : i32
644-
%1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
645-
omp.yield(%1 : !llvm.ptr)
646-
}
647-
llvm.func @wsloop_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
648-
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.wsloop operation}}
649-
// expected-error@below {{LLVM Translation failed for operation: omp.wsloop}}
650-
omp.wsloop private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
651-
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
652-
omp.yield
653-
}
654-
}
655-
llvm.return
656-
}

0 commit comments

Comments
 (0)