Skip to content

Commit 6333f84

Browse files
authored
[flang][OpenMP] Move reductions from loop to teams when loop is mapped to distribute (#132920)
Follow-up to #132003, in particular, see #132003 (comment). This PR extends reduction support for `loop` directives. Consider the following scenario: ```fortran subroutine bar implicit none integer :: x, i !$omp teams loop reduction(+: x) DO i = 1, 5 call foo() END DO end subroutine ``` Note the following: * According to the spec, the `reduction` clause will be attached to `loop` during earlier stages in the compiler. * Additionally, `loop` cannot be mapped to `distribute parallel for` due to the call to a foreign function inside the loop's body. * Therefore, `loop` must be mapped to `distribute`. * However, `distribute` does not have `reduction` clauses. * As a result, we have to move the `reduction`s from the `loop` to its parent `teams` directive, which is what is done by this PR.
1 parent 8e7d6ba commit 6333f84

File tree

2 files changed

+67
-2
lines changed

2 files changed

+67
-2
lines changed

flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,38 @@ class GenericLoopConversionPattern
5757
rewriteToWsloop(loopOp, rewriter);
5858
break;
5959
case GenericLoopCombinedInfo::TeamsLoop:
60-
if (teamsLoopCanBeParallelFor(loopOp))
60+
if (teamsLoopCanBeParallelFor(loopOp)) {
6161
rewriteToDistributeParallelDo(loopOp, rewriter);
62-
else
62+
} else {
63+
auto teamsOp = llvm::cast<mlir::omp::TeamsOp>(loopOp->getParentOp());
64+
auto teamsBlockArgIface =
65+
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*teamsOp);
66+
auto loopBlockArgIface =
67+
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
68+
69+
for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs();
70+
++i) {
71+
mlir::BlockArgument loopRedBlockArg =
72+
loopBlockArgIface.getReductionBlockArgs()[i];
73+
mlir::BlockArgument teamsRedBlockArg =
74+
teamsBlockArgIface.getReductionBlockArgs()[i];
75+
rewriter.replaceAllUsesWith(loopRedBlockArg, teamsRedBlockArg);
76+
}
77+
78+
for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs();
79+
++i) {
80+
loopOp.getRegion().eraseArgument(
81+
loopBlockArgIface.getReductionBlockArgsStart());
82+
}
83+
84+
loopOp.removeReductionModAttr();
85+
loopOp.getReductionVarsMutable().clear();
86+
loopOp.removeReductionByrefAttr();
87+
loopOp.removeReductionSymsAttr();
88+
6389
rewriteToDistribute(loopOp, rewriter);
90+
}
91+
6492
break;
6593
}
6694

flang/test/Lower/OpenMP/loop-directive.f90

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,40 @@ subroutine multi_block_teams
358358
end select
359359
!$omp end target teams
360360
end subroutine
361+
362+
363+
! Verifies that reductions are hoisted to the parent `teams` directive and removed
364+
! from the `loop` directive when `loop` is mapped to `distribute`.
365+
366+
! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_with_reductions
367+
subroutine teams_loop_cannot_be_parallel_for_with_reductions
368+
implicit none
369+
integer :: x, y, i, p
370+
371+
! CHECK: %[[ADD_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ex"}
372+
! CHECK: %[[MUL_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ey"}
373+
! CHECK: omp.teams reduction(
374+
! CHECK-SAME: @add_reduction_i32 %[[ADD_RED]]#0 -> %[[ADD_RED_ARG:[^[:space:]]*]],
375+
! CHECK-SAME: @multiply_reduction_i32 %[[MUL_RED]]#0 -> %[[MUL_RED_ARG:.*]] : {{.*}}) {
376+
377+
! CHECK: omp.distribute private(@{{.*}} %{{.*}} -> %{{.*}}, @{{.*}} %{{.*}} -> %{{.*}} : {{.*}}) {
378+
! CHECK: %[[ADD_RED_DECL:.*]]:2 = hlfir.declare %[[ADD_RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
379+
! CHECK: %[[MUL_RED_DECL:.*]]:2 = hlfir.declare %[[MUL_RED_ARG]] {uniq_name = "_QF{{.*}}Ey"}
380+
381+
! CHECK: %[[ADD_RES:.*]] = arith.addi %{{.*}}, %{{.*}} : i32
382+
! CHECK: hlfir.assign %[[ADD_RES]] to %[[ADD_RED_DECL]]#0 : i32, !fir.ref<i32>
383+
384+
! CHECK: %[[MUL_RES:.*]] = arith.muli %{{.*}}, %{{.*}} : i32
385+
! CHECK: hlfir.assign %[[MUL_RES]] to %[[MUL_RED_DECL]]#0 : i32, !fir.ref<i32>
386+
! CHECK: omp.yield
387+
! CHECK: }
388+
! CHECK: omp.terminator
389+
! CHECK: }
390+
!$omp teams loop reduction(+: x) reduction(*: y) private(p)
391+
do i = 1, 5
392+
call foo()
393+
x = x + i
394+
y = y * i
395+
p = 42
396+
end do
397+
end subroutine

0 commit comments

Comments
 (0)