Skip to content

[MLIR][SCF] Update scf.parallel lowering to OpenMP (3/5) #89212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,9 @@ using TeamsClauseOps =
PrivateClauseOps, ReductionClauseOps, ThreadLimitClauseOps>;

using WsloopClauseOps =
detail::Clauses<AllocateClauseOps, CollapseClauseOps, LinearClauseOps,
LoopRelatedOps, NowaitClauseOps, OrderClauseOps,
OrderedClauseOps, PrivateClauseOps, ReductionClauseOps,
ScheduleClauseOps>;
detail::Clauses<AllocateClauseOps, LinearClauseOps, NowaitClauseOps,
OrderClauseOps, OrderedClauseOps, PrivateClauseOps,
ReductionClauseOps, ScheduleClauseOps>;

} // namespace omp
} // namespace mlir
Expand Down
52 changes: 21 additions & 31 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -600,29 +600,29 @@ def LoopNestOp : OpenMP_Op<"loop_nest", [SameVariadicOperandSize,
//===----------------------------------------------------------------------===//

def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
DeclareOpInterfaceMethods<LoopWrapperInterface>,
RecursiveMemoryEffects, ReductionClauseInterface]> {
RecursiveMemoryEffects, ReductionClauseInterface,
SingleBlockImplicitTerminator<"TerminatorOp">]> {
let summary = "worksharing-loop construct";
let description = [{
The worksharing-loop construct specifies that the iterations of the loop(s)
will be executed in parallel by threads in the current context. These
iterations are spread across threads that already exist in the enclosing
parallel region. The lower and upper bounds specify a half-open range: the
range includes the lower bound but does not include the upper bound. If the
`inclusive` attribute is specified then the upper bound is also included.
parallel region.

The body region can contain any number of blocks. The region is terminated
by "omp.yield" instruction without operands.
The body region can contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

```
omp.wsloop <clauses>
for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
omp.wsloop <clauses> {
omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
}
```

Expand Down Expand Up @@ -665,10 +665,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
passed by reference.
}];

let arguments = (ins Variadic<IntLikeType>:$lowerBound,
Variadic<IntLikeType>:$upperBound,
Variadic<IntLikeType>:$step,
Variadic<AnyType>:$linear_vars,
let arguments = (ins Variadic<AnyType>:$linear_vars,
Variadic<I32>:$linear_step_vars,
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$reductions,
Expand All @@ -679,22 +676,16 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
UnitAttr:$nowait,
UnitAttr:$byref,
ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
OptionalAttr<OrderKindAttr>:$order_val,
UnitAttr:$inclusive);
OptionalAttr<OrderKindAttr>:$order_val);

let builders = [
OpBuilder<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound,
"ValueRange":$step,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
OpBuilder<(ins CArg<"const WsloopClauseOps &">:$clauses)>
];

let regions = (region AnyRegion:$region);

let extraClassDeclaration = [{
/// Returns the number of loops in the worksharing-loop nest.
unsigned getNumLoops() { return getLowerBound().size(); }

/// Returns the number of reduction variables.
unsigned getNumReductionVars() { return getReductionVars().size(); }
}];
Expand All @@ -711,9 +702,8 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|`byref` $byref
|`ordered` `(` $ordered_val `)`
|`order` `(` custom<ClauseAttr>($order_val) `)`
) custom<Wsloop>($region, $lowerBound, $upperBound, $step, type($step),
$reduction_vars, type($reduction_vars), $reductions,
$inclusive) attr-dict
) custom<Wsloop>($region, $reduction_vars, type($reduction_vars),
$reductions) attr-dict
}];
let hasVerifier = 1;
}
Expand Down Expand Up @@ -805,8 +795,8 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,

def YieldOp : OpenMP_Op<"yield",
[Pure, ReturnLike, Terminator,
ParentOneOf<["LoopNestOp", "WsloopOp", "DeclareReductionOp",
"AtomicUpdateOp", "PrivateClauseOp"]>]> {
ParentOneOf<["AtomicUpdateOp", "DeclareReductionOp", "LoopNestOp",
"PrivateClauseOp"]>]> {
let summary = "loop yield and termination operation";
let description = [{
"omp.yield" yields SSA values from the OpenMP dialect op region and
Expand Down
52 changes: 40 additions & 12 deletions mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,18 +461,51 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
// Replace the loop.
{
OpBuilder::InsertionGuard allocaGuard(rewriter);
auto loop = rewriter.create<omp::WsloopOp>(
// Create worksharing loop wrapper.
auto wsloopOp = rewriter.create<omp::WsloopOp>(parallelOp.getLoc());
if (!reductionVariables.empty()) {
wsloopOp.setReductionsAttr(
ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
wsloopOp.getReductionVarsMutable().append(reductionVariables);
}
rewriter.create<omp::TerminatorOp>(loc); // omp.parallel terminator.

// The wrapper's entry block arguments will define the reduction
// variables.
llvm::SmallVector<mlir::Type> reductionTypes;
reductionTypes.reserve(reductionVariables.size());
llvm::transform(reductionVariables, std::back_inserter(reductionTypes),
[](mlir::Value v) { return v.getType(); });
rewriter.createBlock(
&wsloopOp.getRegion(), {}, reductionTypes,
llvm::SmallVector<mlir::Location>(reductionVariables.size(),
parallelOp.getLoc()));

rewriter.setInsertionPoint(
rewriter.create<omp::TerminatorOp>(parallelOp.getLoc()));

// Create loop nest and populate region with contents of scf.parallel.
auto loopOp = rewriter.create<omp::LoopNestOp>(
parallelOp.getLoc(), parallelOp.getLowerBound(),
parallelOp.getUpperBound(), parallelOp.getStep());
rewriter.create<omp::TerminatorOp>(loc);

rewriter.inlineRegionBefore(parallelOp.getRegion(), loop.getRegion(),
loop.getRegion().begin());
rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
loopOp.getRegion().begin());

// Remove reduction-related block arguments from omp.loop_nest and
// redirect uses to the corresponding omp.wsloop block argument.
mlir::Block &loopOpEntryBlock = loopOp.getRegion().front();
unsigned numLoops = parallelOp.getNumLoops();
rewriter.replaceAllUsesWith(
loopOpEntryBlock.getArguments().drop_front(numLoops),
wsloopOp.getRegion().getArguments());
loopOpEntryBlock.eraseArguments(
numLoops, loopOpEntryBlock.getNumArguments() - numLoops);

Block *ops = rewriter.splitBlock(&*loop.getRegion().begin(),
loop.getRegion().begin()->begin());
Block *ops = rewriter.splitBlock(&*loopOp.getRegion().begin(),
loopOp.getRegion().begin()->begin());

rewriter.setInsertionPointToStart(&*loop.getRegion().begin());
rewriter.setInsertionPointToStart(&*loopOp.getRegion().begin());

auto scope = rewriter.create<memref::AllocaScopeOp>(parallelOp.getLoc(),
TypeRange());
Expand All @@ -481,11 +514,6 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
rewriter.mergeBlocks(ops, scopeBlock);
rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
if (!reductionVariables.empty()) {
loop.setReductionsAttr(
ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
loop.getReductionVarsMutable().append(reductionVariables);
}
}
}

Expand Down
Loading