-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][OpenMP] Add reduction
clause support to loop
directive
#128849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
#include "mlir/Transforms/DialectConversion.h" | ||
|
||
#include <memory> | ||
#include <optional> | ||
#include <type_traits> | ||
|
||
namespace flangomp { | ||
#define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS | ||
|
@@ -58,7 +60,7 @@ class GenericLoopConversionPattern | |
if (teamsLoopCanBeParallelFor(loopOp)) | ||
rewriteToDistributeParallelDo(loopOp, rewriter); | ||
else | ||
rewriteToDistrbute(loopOp, rewriter); | ||
rewriteToDistribute(loopOp, rewriter); | ||
break; | ||
} | ||
|
||
|
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern | |
if (loopOp.getOrder()) | ||
return todo("order"); | ||
|
||
if (!loopOp.getReductionVars().empty()) | ||
return todo("reduction"); | ||
|
||
return mlir::success(); | ||
} | ||
|
||
|
@@ -168,7 +167,7 @@ class GenericLoopConversionPattern | |
case ClauseBindKind::Parallel: | ||
return rewriteToWsloop(loopOp, rewriter); | ||
case ClauseBindKind::Teams: | ||
return rewriteToDistrbute(loopOp, rewriter); | ||
return rewriteToDistribute(loopOp, rewriter); | ||
case ClauseBindKind::Thread: | ||
return rewriteToSimdLoop(loopOp, rewriter); | ||
} | ||
|
@@ -211,8 +210,9 @@ class GenericLoopConversionPattern | |
loopOp, rewriter); | ||
} | ||
|
||
void rewriteToDistrbute(mlir::omp::LoopOp loopOp, | ||
mlir::ConversionPatternRewriter &rewriter) const { | ||
void rewriteToDistribute(mlir::omp::LoopOp loopOp, | ||
mlir::ConversionPatternRewriter &rewriter) const { | ||
assert(loopOp.getReductionVars().empty()); | ||
rewriteToSingleWrapperOp<mlir::omp::DistributeOp, | ||
mlir::omp::DistributeOperands>(loopOp, rewriter); | ||
} | ||
|
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern | |
Fortran::common::openmp::EntryBlockArgs args; | ||
args.priv.vars = clauseOps.privateVars; | ||
|
||
if constexpr (!std::is_same_v<OpOperandsTy, | ||
mlir::omp::DistributeOperands>) { | ||
populateReductionClauseOps(loopOp, clauseOps); | ||
args.reduction.vars = clauseOps.reductionVars; | ||
} | ||
|
||
auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps); | ||
mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion()); | ||
|
||
|
@@ -275,8 +281,7 @@ class GenericLoopConversionPattern | |
|
||
auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loopOp.getLoc(), | ||
parallelClauseOps); | ||
mlir::Block *parallelBlock = | ||
genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); | ||
genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); | ||
parallelOp.setComposite(true); | ||
rewriter.setInsertionPoint( | ||
rewriter.create<mlir::omp::TerminatorOp>(loopOp.getLoc())); | ||
|
@@ -288,20 +293,54 @@ class GenericLoopConversionPattern | |
rewriter.createBlock(&distributeOp.getRegion()); | ||
|
||
mlir::omp::WsloopOperands wsloopClauseOps; | ||
populateReductionClauseOps(loopOp, wsloopClauseOps); | ||
Fortran::common::openmp::EntryBlockArgs wsloopArgs; | ||
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; | ||
|
||
auto wsloopOp = | ||
rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps); | ||
wsloopOp.setComposite(true); | ||
rewriter.createBlock(&wsloopOp.getRegion()); | ||
genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); | ||
|
||
mlir::IRMapping mapper; | ||
mlir::Block &loopBlock = *loopOp.getRegion().begin(); | ||
|
||
for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal( | ||
loopBlock.getArguments(), parallelBlock->getArguments())) | ||
auto loopBlockInterface = | ||
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); | ||
auto parallelBlockInterface = | ||
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*parallelOp); | ||
auto wsloopBlockInterface = | ||
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*wsloopOp); | ||
|
||
for (auto [loopOpArg, parallelOpArg] : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Please add a comment to explain why we can match My concern is that, if at some point we introduce any other entry block arguments to the Actually, I think that casting these ops to their There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Used the |
||
llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(), | ||
parallelBlockInterface.getPrivateBlockArgs())) | ||
mapper.map(loopOpArg, parallelOpArg); | ||
|
||
for (auto [loopOpArg, wsloopOpArg] : | ||
llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(), | ||
wsloopBlockInterface.getReductionBlockArgs())) | ||
mapper.map(loopOpArg, wsloopOpArg); | ||
|
||
rewriter.clone(*loopOp.begin(), mapper); | ||
} | ||
|
||
void | ||
populateReductionClauseOps(mlir::omp::LoopOp loopOp, | ||
mlir::omp::ReductionClauseOps &clauseOps) const { | ||
clauseOps.reductionMod = loopOp.getReductionModAttr(); | ||
clauseOps.reductionVars = loopOp.getReductionVars(); | ||
|
||
std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms(); | ||
if (reductionSyms) | ||
clauseOps.reductionSyms.assign(reductionSyms->begin(), | ||
reductionSyms->end()); | ||
|
||
std::optional<llvm::ArrayRef<bool>> reductionByref = | ||
loopOp.getReductionByref(); | ||
if (reductionByref) | ||
clauseOps.reductionByref.assign(reductionByref->begin(), | ||
reductionByref->end()); | ||
} | ||
}; | ||
|
||
class GenericLoopConversionPass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,7 +75,7 @@ subroutine test_order() | |
subroutine test_reduction() | ||
integer :: i, dummy = 1 | ||
|
||
! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction | ||
! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction | ||
! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) { | ||
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} { | ||
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"} | ||
|
@@ -294,3 +294,46 @@ subroutine teams_loop_cannot_be_parallel_for_4 | |
!$omp end parallel | ||
END DO | ||
end subroutine | ||
|
||
! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction | ||
subroutine loop_parallel_bind_reduction | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add one of these reduction tests for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
implicit none | ||
integer :: x, i | ||
|
||
! CHECK: omp.wsloop | ||
! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) | ||
! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) { | ||
! CHECK-NEXT: omp.loop_nest {{.*}} { | ||
! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} | ||
! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} | ||
! CHECK: } | ||
! CHECK: } | ||
!$omp loop bind(parallel) reduction(+: x) | ||
do i = 0, 10 | ||
x = x + i | ||
end do | ||
end subroutine | ||
|
||
! CHECK-LABEL: func.func @_QPloop_teams_loop_reduction | ||
subroutine loop_teams_loop_reduction | ||
implicit none | ||
integer :: x, i | ||
! CHECK: omp.teams { | ||
! CHECK: omp.parallel | ||
! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) { | ||
! CHECK: omp.distribute { | ||
! CHECK: omp.wsloop | ||
! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) { | ||
! CHECK-NEXT: omp.loop_nest {{.*}} { | ||
! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} | ||
! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} | ||
! CHECK: } | ||
! CHECK: } | ||
! CHECK: } | ||
! CHECK: } | ||
! CHECK: } | ||
!$omp teams loop reduction(+: x) | ||
do i = 0, 10 | ||
x = x + i | ||
end do | ||
end subroutine |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm guessing the compound directive splitting in the frontend already makes sure that
!$omp teams loop reduction(...)
only adds thereduction
clause to theteams
leaf construct, so that this assert is not triggered.We probably want to make that check in the MLIR verifier, though, since we can't rely on Flang lowering being the only source of OpenMP dialect operations. If it's an
omp.loop
taking the implicit role of anomp.distribute
, then we need to ensure it doesn't define areduction
clause.We'd probably want to move the checks in
GenericLoopConversionPattern::rewriteStandaloneLoop
,GenericLoopConversionPattern::matchAndRewrite
and related utility functions from here to anextraClassDeclaration
ofmlir::omp::LoopOp
to do this properly. Something likeLoopOpRole mlir::omp::LoopOp::getLoopRole()
, where the possible values aresimd
,wsloop
,distribute
anddistribute parallel wsloop
.This can come as a follow-up PR, not something to be implemented here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have a semantic check for this: #128823. Can look into the verifier improment in a later PR, makes sense.