-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Flang][OpenMP] Allow host evaluation of loop bounds for distribute #127822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-flang-fir-hlfir Author: Sergio Afonso (skatrak) ChangesThis patch adds Full diff: https://github.com/llvm/llvm-project/pull/127822.diff 2 Files Affected:
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index bd794033cdf11..8c80453610473 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -562,8 +562,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_distribute_parallel_do:
case OMPD_distribute_parallel_do_simd:
- cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
cp.processNumThreads(stmtCtx, hostInfo.ops);
+ [[fallthrough]];
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
break;
// Cases where 'teams' clauses might be present, and target SPMD is
@@ -573,10 +576,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_target_teams:
cp.processNumTeams(stmtCtx, hostInfo.ops);
- processSingleNestedIf([](Directive nestedDir) {
- return nestedDir == OMPD_distribute_parallel_do ||
- nestedDir == OMPD_distribute_parallel_do_simd;
- });
+ processSingleNestedIf(
+ [](Directive nestedDir) { return topDistributeSet.test(nestedDir); });
break;
// Cases where only 'teams' host-evaluated clauses might be present.
@@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
+ cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
cp.processNumTeams(stmtCtx, hostInfo.ops);
break;
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90
index 32c52462b86a7..65258c91e5daf 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd()
!$omp end distribute parallel do simd
!$omp end teams
end subroutine distribute_parallel_do_simd
+
+! BOTH-LABEL: func.func @_QPdistribute
+subroutine distribute()
+ ! BOTH: omp.target
+
+ ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
+
+ ! DEVICE-NOT: host_eval({{.*}})
+ ! DEVICE-SAME: {
+
+ ! BOTH: omp.teams
+ !$omp target teams
+
+ ! BOTH: omp.distribute
+ ! BOTH-NEXT: omp.loop_nest
+
+ ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+ !$omp distribute
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute
+ !$omp end target teams
+
+ ! BOTH: omp.target
+ ! BOTH-NOT: host_eval({{.*}})
+ ! BOTH-SAME: {
+ ! BOTH: omp.teams
+ !$omp target teams
+ call foo() !< Prevents this from being Generic-SPMD.
+
+ ! BOTH: omp.distribute
+ !$omp distribute
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute
+ !$omp end target teams
+
+ ! BOTH: omp.teams
+ !$omp teams
+
+ ! BOTH: omp.distribute
+ !$omp distribute
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute
+ !$omp end teams
+end subroutine distribute
+
+! BOTH-LABEL: func.func @_QPdistribute_simd
+subroutine distribute_simd()
+ ! BOTH: omp.target
+
+ ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
+
+ ! DEVICE-NOT: host_eval({{.*}})
+ ! DEVICE-SAME: {
+
+ ! BOTH: omp.teams
+ !$omp target teams
+
+ ! BOTH: omp.distribute
+ ! BOTH-NEXT: omp.simd
+ ! BOTH-NEXT: omp.loop_nest
+
+ ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+ !$omp distribute simd
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute simd
+ !$omp end target teams
+
+ ! BOTH: omp.target
+ ! BOTH-NOT: host_eval({{.*}})
+ ! BOTH-SAME: {
+ ! BOTH: omp.teams
+ !$omp target teams
+ call foo() !< Prevents this from being Generic-SPMD.
+
+ ! BOTH: omp.distribute
+ ! BOTH-NEXT: omp.simd
+ !$omp distribute simd
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute simd
+ !$omp end target teams
+
+ ! BOTH: omp.teams
+ !$omp teams
+
+ ! BOTH: omp.distribute
+ ! BOTH-NEXT: omp.simd
+ !$omp distribute simd
+ do i=1,10
+ call foo()
+ end do
+ !$omp end distribute simd
+ !$omp end teams
+end subroutine distribute_simd
|
33409d2
to
d5cd2ca
Compare
0e96e97
to
c3db0a3
Compare
d5cd2ca
to
b074390
Compare
c3db0a3
to
25e308a
Compare
b074390
to
36e1b5f
Compare
25e308a
to
de75db2
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
36e1b5f
to
0365152
Compare
de75db2
to
bd05899
Compare
0365152
to
146d4ba
Compare
This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent Generic-SPMD kernels for which the trip count must also be evaluated in advance to the kernel call.
bd05899
to
23998b5
Compare
This patch adds
target teams distribute [simd]
and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent kernels for which the trip count must also be evaluated in advance to the kernel call.