-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[OpenMP] Fix task state and taskteams for serial teams #86859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
* Serial teams now use a stack (similar to dispatch buffers) * Serial teams always use t_task_team[0] as the task team and the second pointer is a next pointer for the stack t_task_team[2] is interpreted as a stack of task teams where each level is a nested level inner serial team outer serial team [ t_task_team[0] ] -> (task_team) [ t_task_team[0] ] -> (task_team) [ next ] ----------------> [ next ] -> ... * Remove the task state memo stack from thread structure. * Instead of a thread-private stack, use team structure to store th_task_state of the primary thread. When coming out of a parallel, restore the primary thread's task state. The new field in the team structure doesn't cause sizeof(team) to change and is in the cache line which is only read/written by the primary thread. Fixes: llvm#50602 Fixes: llvm#69368 Fixes: llvm#69733 Fixes: llvm#79416
You can test this locally with the following command:git-clang-format --diff b43ec8e62b5f5a39be378c460339217511261400 5da12c265af119013b75185ef02d333f620d2fd5 -- openmp/runtime/test/target/issue-81488.c openmp/runtime/test/tasking/issue-50602.c openmp/runtime/test/tasking/issue-69368.c openmp/runtime/test/tasking/issue-69733.c openmp/runtime/test/tasking/issue-79416.c openmp/runtime/test/tasking/task_teams_stress_test.cpp openmp/runtime/src/kmp.h openmp/runtime/src/kmp_barrier.cpp openmp/runtime/src/kmp_csupport.cpp openmp/runtime/src/kmp_runtime.cpp openmp/runtime/src/kmp_tasking.cpp View the diff from clang-format here.diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 290869620a..e573c11d1e 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -4164,10 +4164,10 @@ void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) {
KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
if (team->t.t_task_team[0] == NULL) {
team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team);
- KA_TRACE(20,
- ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
- " for serial/root team %p\n",
- __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
+ KA_TRACE(
+ 20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
+ " for serial/root team %p\n",
+ __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
} else
__kmp_task_team_init(team->t.t_task_team[0], team);
@@ -4197,8 +4197,7 @@ void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) {
int other_team = 1 - this_thr->th.th_task_state;
KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
- team->t.t_task_team[other_team] =
- __kmp_allocate_task_team(this_thr, team);
+ team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team);
KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n",
__kmp_gtid_from_thread(this_thr),
diff --git a/openmp/runtime/test/target/issue-81488.c b/openmp/runtime/test/target/issue-81488.c
index 2f79599ba7..6f961aefa5 100644
--- a/openmp/runtime/test/target/issue-81488.c
+++ b/openmp/runtime/test/target/issue-81488.c
@@ -1,5 +1,6 @@
// RUN: %libomp-compile
-// RUN: env OMP_NUM_THREADS=1 LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 %libomp-run
+// RUN: env OMP_NUM_THREADS=1 LIBOMP_USE_HIDDEN_HELPER_TASK=1
+// LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 %libomp-run
#include <stdio.h>
#include <stdlib.h>
@@ -15,16 +16,17 @@ int main(void) {
for (int k = 0; k < Nz; ++k) {
a[k] = -1;
}
- #pragma omp parallel shared(a)
+#pragma omp parallel shared(a)
{
- #pragma omp single
+#pragma omp single
{
- #pragma omp target teams distribute parallel for nowait device(DEVICE_ID) map(tofrom: a[0:8])
+#pragma omp target teams distribute parallel for nowait device(DEVICE_ID) \
+ map(tofrom : a[0 : 8])
for (int i = 0; i < Nz; ++i) {
a[i] = i;
}
}
- #pragma omp barrier
+#pragma omp barrier
}
for (int k = 0; k < Nz; ++k) {
printf("a[%d] = %d\n", k, a[k]);
diff --git a/openmp/runtime/test/tasking/issue-50602.c b/openmp/runtime/test/tasking/issue-50602.c
index ceada58fca..b691204c48 100644
--- a/openmp/runtime/test/tasking/issue-50602.c
+++ b/openmp/runtime/test/tasking/issue-50602.c
@@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
int a = 0;
#ifdef USE_HIDDEN_HELPERS
-#pragma omp target map(tofrom: a) nowait
+#pragma omp target map(tofrom : a) nowait
#else
#pragma omp task shared(a) detach(event)
#endif
diff --git a/openmp/runtime/test/tasking/issue-69733.c b/openmp/runtime/test/tasking/issue-69733.c
index 172056ed2e..5775b016b7 100644
--- a/openmp/runtime/test/tasking/issue-69733.c
+++ b/openmp/runtime/test/tasking/issue-69733.c
@@ -38,7 +38,8 @@ void root_team_hidden_helpers() {
void parallel_detached(int nth1) {
a = 0;
- omp_event_handle_t *evs = (omp_event_handle_t*)malloc(sizeof(omp_event_handle_t) * nth1);
+ omp_event_handle_t *evs =
+ (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nth1);
#pragma omp parallel num_threads(nth1)
{
int tid = omp_get_thread_num();
@@ -62,19 +63,20 @@ void parallel_hidden_helpers(int nth1) {
inc_a();
}
if (a != nth1) {
- fprintf(stderr,
- "error: parallel_hidden_helpers(): a (%d) != %d\n", a, nth1);
+ fprintf(stderr, "error: parallel_hidden_helpers(): a (%d) != %d\n", a,
+ nth1);
exit(EXIT_FAILURE);
}
}
void nested_parallel_detached(int nth1, int nth2) {
a = 0;
- omp_event_handle_t **evs = (omp_event_handle_t**)malloc(sizeof(omp_event_handle_t*) * nth1);
+ omp_event_handle_t **evs =
+ (omp_event_handle_t **)malloc(sizeof(omp_event_handle_t *) * nth1);
#pragma omp parallel num_threads(nth1)
{
int tid = omp_get_thread_num();
- evs[tid] = (omp_event_handle_t*)malloc(sizeof(omp_event_handle_t) * nth2);
+ evs[tid] = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nth2);
#pragma omp parallel num_threads(nth2) shared(tid)
{
int tid2 = omp_get_thread_num();
@@ -134,7 +136,7 @@ int main() {
for (i = 0; i < 10; ++i)
for (nth1 = 1; nth1 <= 4; ++nth1)
for (nth2 = 1; nth2 <= 4; ++nth2)
- nested_parallel_detached(nth1, nth2);
+ nested_parallel_detached(nth1, nth2);
for (i = 0; i < 10; ++i)
for (nth1 = 1; nth1 <= 4; ++nth1)
diff --git a/openmp/runtime/test/tasking/task_teams_stress_test.cpp b/openmp/runtime/test/tasking/task_teams_stress_test.cpp
index d124af6791..e781a895d4 100644
--- a/openmp/runtime/test/tasking/task_teams_stress_test.cpp
+++ b/openmp/runtime/test/tasking/task_teams_stress_test.cpp
@@ -56,7 +56,7 @@ void task_inc_split_a(int *a, int low, int high) {
#ifdef USE_HIDDEN_HELPERS
// Hidden helper tasks force serial regions to create task teams
void task_inc_a_hidden_helper(int *a) {
-#pragma omp target map(tofrom: a[0]) nowait
+#pragma omp target map(tofrom : a[0]) nowait
{
#pragma omp atomic
(*a)++;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks fine from my perspective, but I'm not the foremost expert on libomp.so
so take that with a grain of salt.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Finally we have this issue sorted out (hopefully)!
@jpeyton52 I think this can also fix #81488. If yes, you can also close it once this is merged. |
Centralized the task team initialization code in its own static inline function. |
t_task_team[0]
as the task team and the second pointer is a next pointer for the stackt_task_team[1]
is interpreted as a stack of task teams where each level is a nested levelFixes: #50602
Fixes: #69368
Fixes: #69733
Fixes: #79416