Skip to content

Commit 9c427d7

Browse files
committed
[OpenMP][OMPIRBuilder] Add delayed privatization support for wsloop
Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it.
1 parent a308530 commit 9c427d7

File tree

3 files changed

+184
-114
lines changed

3 files changed

+184
-114
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 105 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
268268
checkAllocate(op, result);
269269
checkLinear(op, result);
270270
checkOrder(op, result);
271-
checkPrivate(op, result);
272271
})
273272
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
274273
.Case([&](omp::SimdOp op) {
@@ -1302,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
13021301
MutableArrayRef<mlir::Value> mlirPrivateVars,
13031302
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
13041303
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
1304+
llvm::IRBuilderBase::InsertPointGuard guard(builder);
13051305
// Allocate private vars
13061306
llvm::BranchInst *allocaTerminator =
13071307
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1363,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
13631363
return afterAllocas;
13641364
}
13651365

1366+
static LogicalResult
1367+
initFirstPrivateVars(llvm::IRBuilderBase &builder,
1368+
LLVM::ModuleTranslation &moduleTranslation,
1369+
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1370+
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1371+
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
1372+
llvm::BasicBlock *afterAllocas) {
1373+
llvm::IRBuilderBase::InsertPointGuard guard(builder);
1374+
// Apply copy region for firstprivate.
1375+
bool needsFirstprivate =
1376+
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1377+
return privOp.getDataSharingType() ==
1378+
omp::DataSharingClauseType::FirstPrivate;
1379+
});
1380+
1381+
if (needsFirstprivate) {
1382+
// Find the end of the allocation blocks
1383+
builder.SetInsertPoint(
1384+
afterAllocas->getSinglePredecessor()->getTerminator());
1385+
llvm::BasicBlock *copyBlock =
1386+
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1387+
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1388+
}
1389+
1390+
for (auto [decl, mlirVar, llvmVar] :
1391+
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1392+
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1393+
continue;
1394+
1395+
// copyRegion implements `lhs = rhs`
1396+
Region &copyRegion = decl.getCopyRegion();
1397+
1398+
// map copyRegion rhs arg
1399+
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1400+
assert(nonPrivateVar);
1401+
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1402+
1403+
// map copyRegion lhs arg
1404+
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1405+
1406+
// in-place convert copy region
1407+
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1408+
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1409+
moduleTranslation)))
1410+
return decl.emitError("failed to inline `copy` region of `omp.private`");
1411+
1412+
// ignore unused value yielded from copy region
1413+
1414+
// clear copy region block argument mapping in case it needs to be
1415+
// re-created with different sources for reuse of the same reduction
1416+
// decl
1417+
moduleTranslation.forgetMapping(copyRegion);
1418+
}
1419+
1420+
return success();
1421+
}
1422+
13661423
static LogicalResult
13671424
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
13681425
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1622,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
16221679
if (handleError(afterAllocas, *taskOp).failed())
16231680
return llvm::make_error<PreviouslyReportedError>();
16241681

1625-
// Apply copy region for firstprivate
1626-
bool needsFirstPrivate =
1627-
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1628-
return privOp.getDataSharingType() ==
1629-
omp::DataSharingClauseType::FirstPrivate;
1630-
});
1631-
if (needsFirstPrivate) {
1632-
// Find the end of the allocation blocks
1633-
assert(afterAllocas.get()->getSinglePredecessor());
1634-
builder.SetInsertPoint(
1635-
afterAllocas.get()->getSinglePredecessor()->getTerminator());
1636-
llvm::BasicBlock *copyBlock =
1637-
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1638-
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1639-
}
1640-
for (auto [decl, mlirVar, llvmVar] :
1641-
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1642-
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1643-
continue;
1644-
1645-
// copyRegion implements `lhs = rhs`
1646-
Region &copyRegion = decl.getCopyRegion();
1647-
1648-
// map copyRegion rhs arg
1649-
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1650-
assert(nonPrivateVar);
1651-
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1652-
1653-
// map copyRegion lhs arg
1654-
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1655-
1656-
// in-place convert copy region
1657-
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1658-
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1659-
builder, moduleTranslation)))
1660-
return llvm::createStringError(
1661-
"failed to inline `copy` region of an `omp.private` op in taskOp");
1662-
1663-
// ignore unused value yielded from copy region
1664-
1665-
// clear copy region block argument mapping in case it needs to be
1666-
// re-created with different source for reuse of the same reduction decl
1667-
moduleTranslation.forgetMapping(copyRegion);
1668-
}
1682+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1683+
llvmPrivateVars, privateDecls,
1684+
afterAllocas.get())))
1685+
return llvm::make_error<PreviouslyReportedError>();
16691686

16701687
// translate the body of the task:
16711688
builder.restoreIP(codegenIP);
@@ -1777,22 +1794,56 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
17771794
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
17781795
}
17791796

1797+
MutableArrayRef<BlockArgument> privateBlockArgs =
1798+
cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
1799+
SmallVector<mlir::Value> mlirPrivateVars;
1800+
SmallVector<llvm::Value *> llvmPrivateVars;
1801+
SmallVector<omp::PrivateClauseOp> privateDecls;
1802+
mlirPrivateVars.reserve(privateBlockArgs.size());
1803+
llvmPrivateVars.reserve(privateBlockArgs.size());
1804+
collectPrivatizationDecls(wsloopOp, privateDecls);
1805+
1806+
for (mlir::Value privateVar : wsloopOp.getPrivateVars())
1807+
mlirPrivateVars.push_back(privateVar);
1808+
17801809
SmallVector<omp::DeclareReductionOp> reductionDecls;
17811810
collectReductionDecls(wsloopOp, reductionDecls);
17821811
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
17831812
findAllocaInsertPoint(builder, moduleTranslation);
17841813

17851814
SmallVector<llvm::Value *> privateReductionVariables(
17861815
wsloopOp.getNumReductionVars());
1816+
1817+
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
1818+
builder, moduleTranslation, privateBlockArgs, privateDecls,
1819+
mlirPrivateVars, llvmPrivateVars, allocaIP);
1820+
if (handleError(afterAllocas, opInst).failed())
1821+
return failure();
1822+
17871823
DenseMap<Value, llvm::Value *> reductionVariableMap;
17881824

17891825
MutableArrayRef<BlockArgument> reductionArgs =
17901826
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
17911827

1792-
if (failed(allocAndInitializeReductionVars(
1793-
wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1794-
reductionDecls, privateReductionVariables, reductionVariableMap,
1795-
isByRef)))
1828+
SmallVector<DeferredStore> deferredStores;
1829+
1830+
if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
1831+
moduleTranslation, allocaIP, reductionDecls,
1832+
privateReductionVariables, reductionVariableMap,
1833+
deferredStores, isByRef)))
1834+
return failure();
1835+
1836+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1837+
llvmPrivateVars, privateDecls,
1838+
afterAllocas.get())))
1839+
return failure();
1840+
1841+
assert(afterAllocas.get()->getSinglePredecessor());
1842+
if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
1843+
moduleTranslation,
1844+
afterAllocas.get()->getSinglePredecessor(),
1845+
reductionDecls, privateReductionVariables,
1846+
reductionVariableMap, isByRef, deferredStores)))
17961847
return failure();
17971848

17981849
// TODO: Replace this with proper composite translation support.
@@ -1959,53 +2010,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
19592010
deferredStores, isByRef)))
19602011
return llvm::make_error<PreviouslyReportedError>();
19612012

1962-
// Apply copy region for firstprivate.
1963-
bool needsFirstprivate =
1964-
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1965-
return privOp.getDataSharingType() ==
1966-
omp::DataSharingClauseType::FirstPrivate;
1967-
});
1968-
if (needsFirstprivate) {
1969-
// Find the end of the allocation blocks
1970-
assert(afterAllocas.get()->getSinglePredecessor());
1971-
builder.SetInsertPoint(
1972-
afterAllocas.get()->getSinglePredecessor()->getTerminator());
1973-
llvm::BasicBlock *copyBlock =
1974-
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1975-
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1976-
}
1977-
1978-
for (auto [decl, mlirVar, llvmVar] :
1979-
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1980-
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1981-
continue;
1982-
1983-
// copyRegion implements `lhs = rhs`
1984-
Region &copyRegion = decl.getCopyRegion();
1985-
1986-
// map copyRegion rhs arg
1987-
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1988-
assert(nonPrivateVar);
1989-
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1990-
1991-
// map copyRegion lhs arg
1992-
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1993-
1994-
// in-place convert copy region
1995-
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1996-
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1997-
builder, moduleTranslation)))
1998-
return llvm::createStringError(
1999-
"failed to inline `copy` region of `omp.private`");
2000-
2001-
// ignore unused value yielded from copy region
2002-
2003-
// clear copy region block argument mapping in case it needs to be
2004-
// re-created with different sources for reuse of the same reduction
2005-
// decl
2006-
moduleTranslation.forgetMapping(copyRegion);
2007-
}
2013+
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
2014+
llvmPrivateVars, privateDecls,
2015+
afterAllocas.get())))
2016+
return llvm::make_error<PreviouslyReportedError>();
20082017

2018+
assert(afterAllocas.get()->getSinglePredecessor());
20092019
if (failed(
20102020
initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
20112021
afterAllocas.get()->getSinglePredecessor(),

mlir/test/Target/LLVMIR/openmp-todo.mlir

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -635,22 +635,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
635635
}
636636
llvm.return
637637
}
638-
639-
// -----
640-
641-
omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
642-
^bb0(%arg0: !llvm.ptr):
643-
%0 = llvm.mlir.constant(1 : i32) : i32
644-
%1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
645-
omp.yield(%1 : !llvm.ptr)
646-
}
647-
llvm.func @wsloop_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
648-
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.wsloop operation}}
649-
// expected-error@below {{LLVM Translation failed for operation: omp.wsloop}}
650-
omp.wsloop private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
651-
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
652-
omp.yield
653-
}
654-
}
655-
llvm.return
656-
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
2+
3+
// tests a wsloop private + firstprivate + reduction to make sure block structure
4+
// is handled properly.
5+
6+
omp.private {type = private} @_QFwsloop_privateEi_private_ref_i32 : !llvm.ptr alloc {
7+
^bb0(%arg0: !llvm.ptr):
8+
%0 = llvm.mlir.constant(1 : i64) : i64
9+
%1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
10+
omp.yield(%1 : !llvm.ptr)
11+
}
12+
13+
omp.private {type = firstprivate} @_QFwsloop_privateEc_firstprivate_ref_c8 : !llvm.ptr alloc {
14+
^bb0(%arg0: !llvm.ptr):
15+
%0 = llvm.mlir.constant(1 : i64) : i64
16+
%1 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c", pinned} : (i64) -> !llvm.ptr
17+
omp.yield(%1 : !llvm.ptr)
18+
} copy {
19+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
20+
%0 = llvm.load %arg0 : !llvm.ptr -> !llvm.array<1 x i8>
21+
llvm.store %0, %arg1 : !llvm.array<1 x i8>, !llvm.ptr
22+
omp.yield(%arg1 : !llvm.ptr)
23+
}
24+
25+
omp.declare_reduction @max_f32 : f32 init {
26+
^bb0(%arg0: f32):
27+
%0 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
28+
omp.yield(%0 : f32)
29+
} combiner {
30+
^bb0(%arg0: f32, %arg1: f32):
31+
%0 = llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<contract>} : (f32, f32) -> f32
32+
omp.yield(%0 : f32)
33+
}
34+
35+
llvm.func @wsloop_private_(%arg0: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPwsloop_private", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
36+
%0 = llvm.mlir.constant(1 : i64) : i64
37+
%1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr
38+
%3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
39+
%5 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c"} : (i64) -> !llvm.ptr
40+
%6 = llvm.mlir.constant(1 : i32) : i32
41+
%7 = llvm.mlir.constant(10 : i32) : i32
42+
%8 = llvm.mlir.constant(0 : i32) : i32
43+
omp.parallel {
44+
omp.wsloop private(@_QFwsloop_privateEc_firstprivate_ref_c8 %5 -> %arg1, @_QFwsloop_privateEi_private_ref_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) reduction(@max_f32 %1 -> %arg3 : !llvm.ptr) {
45+
omp.loop_nest (%arg4) : i32 = (%8) to (%7) inclusive step (%6) {
46+
omp.yield
47+
}
48+
}
49+
omp.terminator
50+
}
51+
llvm.return
52+
}
53+
54+
// CHECK: call void {{.*}} @__kmpc_fork_call(ptr @1, i32 1, ptr @[[OUTLINED:.*]], ptr %{{.*}})
55+
56+
// CHECK: define internal void @[[OUTLINED:.*]]{{.*}} {
57+
58+
// First, check that all memory for privates and reductions is allocated.
59+
// CHECK: omp.par.entry:
60+
// CHECK: %[[CHR:.*]] = alloca [1 x i8], i64 1, align 1
61+
// CHECK: %[[INT:.*]] = alloca i32, i64 1, align 4
62+
// CHECK: %[[FLT:.*]] = alloca float, align 4
63+
// CHECK: %[[RED_ARR:.*]] = alloca [1 x ptr], align 8
64+
// CHECK: br label %[[LATE_ALLOC_BB:.*]]
65+
66+
// CHECK: [[LATE_ALLOC_BB]]:
67+
// CHECK: br label %[[PRIVATE_CPY:.*]]
68+
69+
// Second, check that first private was properly copied.
70+
// CHECK: [[PRIVATE_CPY:.*]]:
71+
// CHECK: %[[CHR_VAL:.*]] = load [1 x i8], ptr %{{.*}}, align 1
72+
// CHECK: store [1 x i8] %[[CHR_VAL]], ptr %[[CHR]], align 1
73+
// CHECK: br label %[[RED_INIT:.*]]
74+
75+
// Third, check that reduction init took place.
76+
// CHECK: [[RED_INIT]]:
77+
// CHECK: store float 0x{{.*}}, ptr %[[FLT]], align 4
78+
79+
// CHECK: }

0 commit comments

Comments
 (0)