Skip to content

Commit 47d42cf

Browse files
[mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization in omp.target ops. (#109668)
This patch adds support to translate the `private` clause on `omp.target` ops from MLIR to LLVMIR. This first cut only handles non-allocatables. Also, this is for delayed privatization.
1 parent f2f9cdd commit 47d42cf

File tree

2 files changed

+187
-23
lines changed

2 files changed

+187
-23
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 88 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,41 @@ class OmpParallelOpConversionManager {
13561356
unsigned privateArgEndIdx;
13571357
};
13581358

1359+
// Looks up from the operation from and returns the PrivateClauseOp with
1360+
// name symbolName
1361+
static omp::PrivateClauseOp findPrivatizer(Operation *from,
1362+
SymbolRefAttr symbolName) {
1363+
omp::PrivateClauseOp privatizer =
1364+
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
1365+
symbolName);
1366+
assert(privatizer && "privatizer not found in the symbol table");
1367+
return privatizer;
1368+
}
1369+
// clones the given privatizer. The original privatizer is used as
1370+
// the insert point for the clone.
1371+
static omp::PrivateClauseOp
1372+
clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
1373+
omp::PrivateClauseOp privatizer, Operation *fromOperation) {
1374+
MLIRContext &context = moduleTranslation.getContext();
1375+
mlir::IRRewriter opCloner(&context);
1376+
opCloner.setInsertionPoint(privatizer);
1377+
auto clone =
1378+
llvm::cast<mlir::omp::PrivateClauseOp>(opCloner.clone(*privatizer));
1379+
1380+
// Unique the clone name to avoid clashes in the symbol table.
1381+
unsigned counter = 0;
1382+
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
1383+
privatizer.getSymName(),
1384+
[&](llvm::StringRef candidate) {
1385+
return SymbolTable::lookupNearestSymbolFrom(
1386+
fromOperation, StringAttr::get(&context, candidate)) !=
1387+
nullptr;
1388+
},
1389+
counter);
1390+
1391+
clone.setSymName(cloneName);
1392+
return clone;
1393+
}
13591394
/// Converts the OpenMP parallel operation to LLVM IR.
13601395
static LogicalResult
13611396
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1611,34 +1646,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
16111646
continue;
16121647

16131648
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(mlirPrivatizerAttr);
1614-
omp::PrivateClauseOp privatizer =
1615-
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1616-
opInst, privSym);
1649+
omp::PrivateClauseOp privatizer = findPrivatizer(opInst, privSym);
16171650

16181651
// Clone the privatizer in case it is used by more than one parallel
16191652
// region. The privatizer is processed in-place (see below) before it
16201653
// gets inlined in the parallel region and therefore processing the
16211654
// original op is dangerous.
1622-
1623-
MLIRContext &context = moduleTranslation.getContext();
1624-
mlir::IRRewriter opCloner(&context);
1625-
opCloner.setInsertionPoint(privatizer);
1626-
auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
1627-
opCloner.clone(*privatizer));
1628-
1629-
// Unique the clone name to avoid clashes in the symbol table.
1630-
unsigned counter = 0;
1631-
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
1632-
privatizer.getSymName(),
1633-
[&](llvm::StringRef candidate) {
1634-
return SymbolTable::lookupNearestSymbolFrom(
1635-
opInst, StringAttr::get(&context, candidate)) !=
1636-
nullptr;
1637-
},
1638-
counter);
1639-
1640-
clone.setSymName(cloneName);
1641-
return {mlirPrivVar, clone};
1655+
return {mlirPrivVar,
1656+
clonePrivatizer(moduleTranslation, privatizer, opInst)};
16421657
}
16431658
}
16441659

@@ -3434,6 +3449,56 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
34343449
const auto &arg = targetRegion.front().getArgument(argIndex);
34353450
moduleTranslation.mapValue(arg, mapOpValue);
34363451
}
3452+
3453+
// Do privatization after moduleTranslation has already recorded
3454+
// mapped values.
3455+
if (!targetOp.getPrivateVars().empty()) {
3456+
builder.restoreIP(allocaIP);
3457+
3458+
OperandRange privateVars = targetOp.getPrivateVars();
3459+
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
3460+
unsigned numMapVars = targetOp.getMapVars().size();
3461+
Block &firstTargetBlock = targetRegion.front();
3462+
BlockArgument *blockArgsStart = firstTargetBlock.getArguments().begin();
3463+
BlockArgument *privArgsStart = blockArgsStart + numMapVars;
3464+
BlockArgument *privArgsEnd =
3465+
privArgsStart + targetOp.getPrivateVars().size();
3466+
MutableArrayRef privateBlockArgs(privArgsStart, privArgsEnd);
3467+
3468+
for (auto [privVar, privatizerNameAttr, privBlockArg] :
3469+
llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
3470+
3471+
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
3472+
omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
3473+
if (privatizer.getDataSharingType() ==
3474+
omp::DataSharingClauseType::FirstPrivate ||
3475+
!privatizer.getDeallocRegion().empty()) {
3476+
opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
3477+
"failed because translation of firstprivate and "
3478+
" private allocatables is not supported yet");
3479+
bodyGenStatus = failure();
3480+
} else {
3481+
Region &allocRegion = privatizer.getAllocRegion();
3482+
BlockArgument allocRegionArg = allocRegion.getArgument(0);
3483+
moduleTranslation.mapValue(allocRegionArg,
3484+
moduleTranslation.lookupValue(privVar));
3485+
SmallVector<llvm::Value *, 1> yieldedValues;
3486+
if (failed(inlineConvertOmpRegions(
3487+
allocRegion, "omp.targetop.privatizer", builder,
3488+
moduleTranslation, &yieldedValues))) {
3489+
opInst.emitError(
3490+
"failed to inline `alloc` region of an `omp.private` "
3491+
"op in the target region");
3492+
bodyGenStatus = failure();
3493+
} else {
3494+
assert(yieldedValues.size() == 1);
3495+
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
3496+
}
3497+
moduleTranslation.forgetMapping(allocRegion);
3498+
builder.restoreIP(builder.saveIP());
3499+
}
3500+
}
3501+
}
34373502
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
34383503
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
34393504
builder.SetInsertPoint(exitBlock);
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
4+
^bb0(%arg0: !llvm.ptr):
5+
%0 = llvm.mlir.constant(1 : i64) : i64
6+
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
7+
omp.yield(%1 : !llvm.ptr)
8+
}
9+
llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
10+
%0 = llvm.mlir.constant(1 : i64) : i64
11+
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
12+
%3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
13+
%4 = llvm.mlir.constant(2 : i32) : i32
14+
llvm.store %4, %3 : i32, !llvm.ptr
15+
%5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
16+
omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
17+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
18+
%6 = llvm.mlir.constant(10 : i32) : i32
19+
%7 = llvm.load %arg0 : !llvm.ptr -> i32
20+
%8 = llvm.add %7, %6 : i32
21+
llvm.store %8, %arg1 : i32, !llvm.ptr
22+
omp.terminator
23+
}
24+
llvm.return
25+
}
26+
// CHECK: define internal void @__omp_offloading_
27+
// CHECK-NOT: define {{.*}}
28+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
29+
// CHECK: %[[ADD:.*]] = add i32 {{.*}}, 10
30+
// CHECK: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
31+
32+
omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
33+
^bb0(%arg0: !llvm.ptr):
34+
%0 = llvm.mlir.constant(1 : i64) : i64
35+
%1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
36+
omp.yield(%1 : !llvm.ptr)
37+
}
38+
llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
39+
%0 = llvm.mlir.constant(1 : i64) : i64
40+
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
41+
%3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
42+
%5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
43+
%6 = llvm.mlir.constant(2 : i32) : i32
44+
llvm.store %6, %5 : i32, !llvm.ptr
45+
%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
46+
omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
47+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
48+
%8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
49+
%9 = llvm.mlir.constant(10 : i32) : i32
50+
%10 = llvm.load %arg0 : !llvm.ptr -> i32
51+
%11 = llvm.add %10, %9 : i32
52+
llvm.store %11, %arg1 : i32, !llvm.ptr
53+
%12 = llvm.load %arg1 : !llvm.ptr -> i32
54+
%13 = llvm.sitofp %12 : i32 to f32
55+
%14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
56+
llvm.store %14, %arg2 : f32, !llvm.ptr
57+
omp.terminator
58+
}
59+
llvm.return
60+
}
61+
62+
63+
// CHECK: define internal void @__omp_offloading_
64+
// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
65+
// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
66+
// CHECK: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
67+
// CHECK: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
68+
// CHECK: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %[[PRIV_I32_ALLOC]], align 4
69+
// CHECK: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
70+
// CHECK: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
71+
// CHECK: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
72+
73+
// An entirely artifical privatizer that is meant to check multi-block
74+
// privatizers. The idea here is to prove that we set the correct
75+
// insertion points for the builder when generating, first, LLVM IR for the
76+
// privatizer and then for the actual target region.
77+
omp.private {type = private} @multi_block.privatizer : !llvm.ptr alloc {
78+
^bb0(%arg0: !llvm.ptr):
79+
%c1 = llvm.mlir.constant(1 : i32) : i32
80+
llvm.br ^bb1(%c1 : i32)
81+
82+
^bb1(%arg1: i32):
83+
%0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
84+
omp.yield(%0 : !llvm.ptr)
85+
}
86+
87+
llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
88+
omp.target private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
89+
^bb0(%arg2: !llvm.ptr):
90+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
91+
omp.terminator
92+
}
93+
llvm.return
94+
}
95+
// CHECK: define internal void @__omp_offloading_
96+
// CHECK: %[[ONE:.*]] = phi i32 [ 1, {{.*}} ]
97+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
98+
// CHECK: %[[PHI_ALLOCA:.*]] = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
99+
// CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4

0 commit comments

Comments
 (0)