Skip to content

[mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization in omp.target ops. #109668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 88 additions & 23 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,41 @@ class OmpParallelOpConversionManager {
unsigned privateArgEndIdx;
};

// Looks up from the operation from and returns the PrivateClauseOp with
// name symbolName
static omp::PrivateClauseOp findPrivatizer(Operation *from,
SymbolRefAttr symbolName) {
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
symbolName);
assert(privatizer && "privatizer not found in the symbol table");
return privatizer;
}
// clones the given privatizer. The original privatizer is used as
// the insert point for the clone.
static omp::PrivateClauseOp
clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
omp::PrivateClauseOp privatizer, Operation *fromOperation) {
MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter opCloner(&context);
opCloner.setInsertionPoint(privatizer);
auto clone =
llvm::cast<mlir::omp::PrivateClauseOp>(opCloner.clone(*privatizer));

// Unique the clone name to avoid clashes in the symbol table.
unsigned counter = 0;
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
privatizer.getSymName(),
[&](llvm::StringRef candidate) {
return SymbolTable::lookupNearestSymbolFrom(
fromOperation, StringAttr::get(&context, candidate)) !=
nullptr;
},
counter);

clone.setSymName(cloneName);
return clone;
}
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
Expand Down Expand Up @@ -1611,34 +1646,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
continue;

SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(mlirPrivatizerAttr);
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
opInst, privSym);
omp::PrivateClauseOp privatizer = findPrivatizer(opInst, privSym);

// Clone the privatizer in case it is used by more than one parallel
// region. The privatizer is processed in-place (see below) before it
// gets inlined in the parallel region and therefore processing the
// original op is dangerous.

MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter opCloner(&context);
opCloner.setInsertionPoint(privatizer);
auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
opCloner.clone(*privatizer));

// Unique the clone name to avoid clashes in the symbol table.
unsigned counter = 0;
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
privatizer.getSymName(),
[&](llvm::StringRef candidate) {
return SymbolTable::lookupNearestSymbolFrom(
opInst, StringAttr::get(&context, candidate)) !=
nullptr;
},
counter);

clone.setSymName(cloneName);
return {mlirPrivVar, clone};
return {mlirPrivVar,
clonePrivatizer(moduleTranslation, privatizer, opInst)};
}
}

Expand Down Expand Up @@ -3435,6 +3450,56 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
const auto &arg = targetRegion.front().getArgument(argIndex);
moduleTranslation.mapValue(arg, mapOpValue);
}

// Do privatization after moduleTranslation has already recorded
// mapped values.
if (!targetOp.getPrivateVars().empty()) {
builder.restoreIP(allocaIP);

OperandRange privateVars = targetOp.getPrivateVars();
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
unsigned numMapVars = targetOp.getMapVars().size();
Block &firstTargetBlock = targetRegion.front();
BlockArgument *blockArgsStart = firstTargetBlock.getArguments().begin();
BlockArgument *privArgsStart = blockArgsStart + numMapVars;
BlockArgument *privArgsEnd =
privArgsStart + targetOp.getPrivateVars().size();
MutableArrayRef privateBlockArgs(privArgsStart, privArgsEnd);

for (auto [privVar, privatizerNameAttr, privBlockArg] :
llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {

SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate ||
!privatizer.getDeallocRegion().empty()) {
opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
"failed because translation of firstprivate and "
" private allocatables is not supported yet");
bodyGenStatus = failure();
} else {
Region &allocRegion = privatizer.getAllocRegion();
BlockArgument allocRegionArg = allocRegion.getArgument(0);
moduleTranslation.mapValue(allocRegionArg,
moduleTranslation.lookupValue(privVar));
SmallVector<llvm::Value *, 1> yieldedValues;
if (failed(inlineConvertOmpRegions(
allocRegion, "omp.targetop.privatizer", builder,
moduleTranslation, &yieldedValues))) {
opInst.emitError(
"failed to inline `alloc` region of an `omp.private` "
"op in the target region");
bodyGenStatus = failure();
} else {
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
}
moduleTranslation.forgetMapping(allocRegion);
builder.restoreIP(builder.saveIP());
}
}
}
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(exitBlock);
Expand Down
99 changes: 99 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-target-private.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
%3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
%4 = llvm.mlir.constant(2 : i32) : i32
llvm.store %4, %3 : i32, !llvm.ptr
%5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%6 = llvm.mlir.constant(10 : i32) : i32
%7 = llvm.load %arg0 : !llvm.ptr -> i32
%8 = llvm.add %7, %6 : i32
llvm.store %8, %arg1 : i32, !llvm.ptr
omp.terminator
}
llvm.return
}
// CHECK: define internal void @__omp_offloading_
// CHECK-NOT: define {{.*}}
// CHECK: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[ADD:.*]] = add i32 {{.*}}, 10
// CHECK: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4

omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
%3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
%5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
%6 = llvm.mlir.constant(2 : i32) : i32
llvm.store %6, %5 : i32, !llvm.ptr
%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
%8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
%9 = llvm.mlir.constant(10 : i32) : i32
%10 = llvm.load %arg0 : !llvm.ptr -> i32
%11 = llvm.add %10, %9 : i32
llvm.store %11, %arg1 : i32, !llvm.ptr
%12 = llvm.load %arg1 : !llvm.ptr -> i32
%13 = llvm.sitofp %12 : i32 to f32
%14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
llvm.store %14, %arg2 : f32, !llvm.ptr
omp.terminator
}
llvm.return
}


// CHECK: define internal void @__omp_offloading_
// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
// CHECK: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
// CHECK: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
// CHECK: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %[[PRIV_I32_ALLOC]], align 4
// CHECK: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
// CHECK: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
// CHECK: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4

// An entirely artifical privatizer that is meant to check multi-block
// privatizers. The idea here is to prove that we set the correct
// insertion points for the builder when generating, first, LLVM IR for the
// privatizer and then for the actual target region.
omp.private {type = private} @multi_block.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%c1 = llvm.mlir.constant(1 : i32) : i32
llvm.br ^bb1(%c1 : i32)

^bb1(%arg1: i32):
%0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
omp.yield(%0 : !llvm.ptr)
}

llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
omp.target private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
^bb0(%arg2: !llvm.ptr):
%0 = llvm.load %arg2 : !llvm.ptr -> f32
omp.terminator
}
llvm.return
}
// CHECK: define internal void @__omp_offloading_
// CHECK: %[[ONE:.*]] = phi i32 [ 1, {{.*}} ]
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
// CHECK: %[[PHI_ALLOCA:.*]] = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
// CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4
Loading