-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][OpenMP] Add reduction
clause support to loop
directive
#128849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp Author: Kareem Ergawy (ergawy) ChangesExtends Full diff: https://github.com/llvm/llvm-project/pull/128849.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index bf94166edc079..7a9f65982e69c 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -15,6 +15,8 @@
#include "mlir/Transforms/DialectConversion.h"
#include <memory>
+#include <optional>
+#include <type_traits>
namespace flangomp {
#define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern
if (loopOp.getOrder())
return todo("order");
- if (!loopOp.getReductionVars().empty())
- return todo("reduction");
-
return mlir::success();
}
@@ -213,6 +212,7 @@ class GenericLoopConversionPattern
void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
+ assert(loopOp.getReductionVars().empty());
rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
mlir::omp::DistributeOperands>(loopOp, rewriter);
}
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern
Fortran::common::openmp::EntryBlockArgs args;
args.priv.vars = clauseOps.privateVars;
+ if constexpr (!std::is_same_v<OpOperandsTy,
+ mlir::omp::DistributeOperands>) {
+ populateReductionClauseOps(loopOp, clauseOps);
+ args.reduction.vars = clauseOps.reductionVars;
+ }
+
auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
@@ -288,20 +294,51 @@ class GenericLoopConversionPattern
rewriter.createBlock(&distributeOp.getRegion());
mlir::omp::WsloopOperands wsloopClauseOps;
+ populateReductionClauseOps(loopOp, wsloopClauseOps);
+ Fortran::common::openmp::EntryBlockArgs wsloopArgs;
+ wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
+
auto wsloopOp =
rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(true);
- rewriter.createBlock(&wsloopOp.getRegion());
+ mlir::Block *loopBlock =
+ genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion());
mlir::IRMapping mapper;
- mlir::Block &loopBlock = *loopOp.getRegion().begin();
- for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal(
- loopBlock.getArguments(), parallelBlock->getArguments()))
+ auto loopBlockInterface =
+ llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
+
+ for (auto [loopOpArg, parallelOpArg] :
+ llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(),
+ parallelBlock->getArguments()))
mapper.map(loopOpArg, parallelOpArg);
+ for (auto [loopOpArg, wsloopOpArg] :
+ llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(),
+ loopBlock->getArguments()))
+ mapper.map(loopOpArg, wsloopOpArg);
+
rewriter.clone(*loopOp.begin(), mapper);
}
+
+ template <typename OpOperandsTy>
+ void populateReductionClauseOps(mlir::omp::LoopOp loopOp,
+ OpOperandsTy &clauseOps) const {
+ clauseOps.reductionMod = loopOp.getReductionModAttr();
+ clauseOps.reductionVars = loopOp.getReductionVars();
+
+ std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms();
+ if (reductionSyms)
+ clauseOps.reductionSyms.assign(reductionSyms->begin(),
+ reductionSyms->end());
+
+ std::optional<llvm::ArrayRef<bool>> reductionByref =
+ loopOp.getReductionByref();
+ if (reductionByref)
+ clauseOps.reductionByref.assign(reductionByref->begin(),
+ reductionByref->end());
+ }
};
class GenericLoopConversionPass
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index ffa4a6ff24f24..7ee23ac5f5679 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -75,7 +75,7 @@ subroutine test_order()
subroutine test_reduction()
integer :: i, dummy = 1
- ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"}
@@ -294,3 +294,39 @@ subroutine teams_loop_cannot_be_parallel_for_4
!$omp end parallel
END DO
end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction
+subroutine loop_parallel_bind_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>)
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"}
+ ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_reduction
+subroutine teams_loop_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index e992296c9a837..64094d61eb9a3 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,24 +1,12 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
-
-omp.declare_reduction @add_reduction_i32 : i32 init {
- ^bb0(%arg0: i32):
- %c0_i32 = arith.constant 0 : i32
- omp.yield(%c0_i32 : i32)
- } combiner {
- ^bb0(%arg0: i32, %arg1: i32):
- %0 = arith.addi %arg0, %arg1 : i32
- omp.yield(%0 : i32)
- }
-
func.func @_QPloop_order() {
omp.teams {
%c0 = arith.constant 0 : i32
%c10 = arith.constant 10 : i32
%c1 = arith.constant 1 : i32
- %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
- // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
- omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}}
+ omp.loop order(reproducible:concurrent) {
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
omp.yield
}
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesExtends Full diff: https://github.com/llvm/llvm-project/pull/128849.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index bf94166edc079..7a9f65982e69c 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -15,6 +15,8 @@
#include "mlir/Transforms/DialectConversion.h"
#include <memory>
+#include <optional>
+#include <type_traits>
namespace flangomp {
#define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern
if (loopOp.getOrder())
return todo("order");
- if (!loopOp.getReductionVars().empty())
- return todo("reduction");
-
return mlir::success();
}
@@ -213,6 +212,7 @@ class GenericLoopConversionPattern
void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
+ assert(loopOp.getReductionVars().empty());
rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
mlir::omp::DistributeOperands>(loopOp, rewriter);
}
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern
Fortran::common::openmp::EntryBlockArgs args;
args.priv.vars = clauseOps.privateVars;
+ if constexpr (!std::is_same_v<OpOperandsTy,
+ mlir::omp::DistributeOperands>) {
+ populateReductionClauseOps(loopOp, clauseOps);
+ args.reduction.vars = clauseOps.reductionVars;
+ }
+
auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
@@ -288,20 +294,51 @@ class GenericLoopConversionPattern
rewriter.createBlock(&distributeOp.getRegion());
mlir::omp::WsloopOperands wsloopClauseOps;
+ populateReductionClauseOps(loopOp, wsloopClauseOps);
+ Fortran::common::openmp::EntryBlockArgs wsloopArgs;
+ wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
+
auto wsloopOp =
rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(true);
- rewriter.createBlock(&wsloopOp.getRegion());
+ mlir::Block *loopBlock =
+ genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion());
mlir::IRMapping mapper;
- mlir::Block &loopBlock = *loopOp.getRegion().begin();
- for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal(
- loopBlock.getArguments(), parallelBlock->getArguments()))
+ auto loopBlockInterface =
+ llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
+
+ for (auto [loopOpArg, parallelOpArg] :
+ llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(),
+ parallelBlock->getArguments()))
mapper.map(loopOpArg, parallelOpArg);
+ for (auto [loopOpArg, wsloopOpArg] :
+ llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(),
+ loopBlock->getArguments()))
+ mapper.map(loopOpArg, wsloopOpArg);
+
rewriter.clone(*loopOp.begin(), mapper);
}
+
+ template <typename OpOperandsTy>
+ void populateReductionClauseOps(mlir::omp::LoopOp loopOp,
+ OpOperandsTy &clauseOps) const {
+ clauseOps.reductionMod = loopOp.getReductionModAttr();
+ clauseOps.reductionVars = loopOp.getReductionVars();
+
+ std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms();
+ if (reductionSyms)
+ clauseOps.reductionSyms.assign(reductionSyms->begin(),
+ reductionSyms->end());
+
+ std::optional<llvm::ArrayRef<bool>> reductionByref =
+ loopOp.getReductionByref();
+ if (reductionByref)
+ clauseOps.reductionByref.assign(reductionByref->begin(),
+ reductionByref->end());
+ }
};
class GenericLoopConversionPass
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index ffa4a6ff24f24..7ee23ac5f5679 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -75,7 +75,7 @@ subroutine test_order()
subroutine test_reduction()
integer :: i, dummy = 1
- ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"}
@@ -294,3 +294,39 @@ subroutine teams_loop_cannot_be_parallel_for_4
!$omp end parallel
END DO
end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction
+subroutine loop_parallel_bind_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>)
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"}
+ ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_reduction
+subroutine teams_loop_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index e992296c9a837..64094d61eb9a3 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,24 +1,12 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
-
-omp.declare_reduction @add_reduction_i32 : i32 init {
- ^bb0(%arg0: i32):
- %c0_i32 = arith.constant 0 : i32
- omp.yield(%c0_i32 : i32)
- } combiner {
- ^bb0(%arg0: i32, %arg1: i32):
- %0 = arith.addi %arg0, %arg1 : i32
- omp.yield(%0 : i32)
- }
-
func.func @_QPloop_order() {
omp.teams {
%c0 = arith.constant 0 : i32
%c10 = arith.constant 10 : i32
%c1 = arith.constant 1 : i32
- %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
- // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
- omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}}
+ omp.loop order(reproducible:concurrent) {
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
omp.yield
}
|
8f2c57e
to
5b88e7d
Compare
8ec979d
to
20a2501
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Kareem, a couple comments from me.
@@ -213,6 +212,7 @@ class GenericLoopConversionPattern | |||
|
|||
void rewriteToDistrbute(mlir::omp::LoopOp loopOp, | |||
mlir::ConversionPatternRewriter &rewriter) const { | |||
assert(loopOp.getReductionVars().empty()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm guessing the compound directive splitting in the frontend already makes sure that !$omp teams loop reduction(...)
only adds the reduction
clause to the teams
leaf construct, so that this assert is not triggered.
We probably want to make that check in the MLIR verifier, though, since we can't rely on Flang lowering being the only source of OpenMP dialect operations. If it's an omp.loop
taking the implicit role of an omp.distribute
, then we need to ensure it doesn't define a reduction
clause.
We'd probably want to move the checks in GenericLoopConversionPattern::rewriteStandaloneLoop
, GenericLoopConversionPattern::matchAndRewrite
and related utility functions from here to an extraClassDeclaration
of mlir::omp::LoopOp
to do this properly. Something like LoopOpRole mlir::omp::LoopOp::getLoopRole()
, where the possible values are simd
, wsloop
, distribute
and distribute parallel wsloop
.
This can come as a follow-up PR, not something to be implemented here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have a semantic check for this: #128823. Can look into the verifier improment in a later PR, makes sense.
auto loopBlockInterface = | ||
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); | ||
|
||
for (auto [loopOpArg, parallelOpArg] : |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Please add a comment to explain why we can match private
entry block arguments to all of the entry block arguments of omp.parallel
, and the same below with respect to reduction
and omp.loop
.
My concern is that, if at some point we introduce any other entry block arguments to the omp.parallel
or omp.loop
operations, this here will break and troubleshooting it won't be straightforward.
Actually, I think that casting these ops to their BlockArgOpenMPOpInterface
and accessing their arguments via get{Private,Reduction}BlockArgs
makes this much more resilient to future updates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Used the BlockArgOpenMPInterface
to access the relevant args.
20a2501
to
5539c20
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
@@ -294,3 +294,22 @@ subroutine teams_loop_cannot_be_parallel_for_4 | |||
!$omp end parallel | |||
END DO | |||
end subroutine | |||
|
|||
! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction | |||
subroutine loop_parallel_bind_reduction |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add one of these reduction tests for the distribute parallel do
case? That way we test the rewriteToDistributeParallelDo
function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Extends `loop` directive transformation by adding support for the `reduction` clause.
5539c20
to
d096245
Compare
…lvm#128849) Extends `loop` directive transformation by adding support for the `reduction` clause.
Extends
loop
directive transformation by adding support for thereduction
clause.