Skip to content

[OpenMP] Fix task state and taskteams for serial teams #86859

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2871,6 +2871,11 @@ union KMP_ALIGN_CACHE kmp_task_team {
char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
};

typedef struct kmp_task_team_list_t {
kmp_task_team_t *task_team;
kmp_task_team_list_t *next;
} kmp_task_team_list_t;

#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
// Free lists keep same-size free memory slots for fast memory allocation
// routines
Expand Down Expand Up @@ -3008,10 +3013,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
kmp_task_team_t *th_task_team; // Task team struct
kmp_taskdata_t *th_current_task; // Innermost Task being executed
kmp_uint8 th_task_state; // alternating 0/1 for task team identification
kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
// at nested levels
kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
kmp_uint32 th_reap_state; // Non-zero indicates thread is not
// tasking, thus safe to reap

Expand Down Expand Up @@ -3133,6 +3134,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
kmp_disp_t *t_dispatch; // thread's dispatch data
kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
kmp_proc_bind_t t_proc_bind; // bind type for par region
int t_primary_task_state; // primary thread's task state saved
#if USE_ITT_BUILD
kmp_uint64 t_region_time; // region begin timestamp
#endif /* USE_ITT_BUILD */
Expand Down Expand Up @@ -3204,6 +3206,12 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
distributedBarrier *b; // Distributed barrier data associated with team
} kmp_base_team_t;

// Assert that the list structure fits and aligns within
// the double task team pointer
KMP_BUILD_ASSERT(sizeof(kmp_task_team_t *[2]) == sizeof(kmp_task_team_list_t));
KMP_BUILD_ASSERT(alignof(kmp_task_team_t *[2]) ==
alignof(kmp_task_team_list_t));

union KMP_ALIGN_CACHE kmp_team {
kmp_base_team_t t;
double t_align; /* use worst case alignment */
Expand Down Expand Up @@ -4114,9 +4122,10 @@ extern void __kmp_fulfill_event(kmp_event_t *event);
extern void __kmp_free_task_team(kmp_info_t *thread,
kmp_task_team_t *task_team);
extern void __kmp_reap_task_teams(void);
extern void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team);
extern void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team);
extern void __kmp_wait_to_unref_task_teams(void);
extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always);
extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team);
extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
#if USE_ITT_BUILD
Expand All @@ -4127,6 +4136,14 @@ extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
int wait = 1);
extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid);
#if KMP_DEBUG
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) \
KMP_DEBUG_ASSERT( \
__kmp_tasking_mode != tskm_task_teams || team->t.t_nproc == 1 || \
thr->th.th_task_team == team->t.t_task_team[thr->th.th_task_state])
#else
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) /* Nothing */
#endif

extern int __kmp_is_address_mapped(void *addr);
extern kmp_uint64 __kmp_hardware_timestamp(void);
Expand Down
15 changes: 5 additions & 10 deletions openmp/runtime/src/kmp_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1858,8 +1858,7 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
}

if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
// use 0 to only setup the current team if nthreads > 1
__kmp_task_team_setup(this_thr, team, 0);
__kmp_task_team_setup(this_thr, team);

if (cancellable) {
cancelled = __kmp_linear_barrier_gather_cancellable(
Expand Down Expand Up @@ -2042,7 +2041,7 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
TRUE);
__kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
__kmp_task_team_setup(this_thr, team, 0);
__kmp_task_team_setup(this_thr, team);

#if USE_ITT_BUILD
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Expand Down Expand Up @@ -2243,9 +2242,7 @@ void __kmp_join_barrier(int gtid) {
__kmp_gtid_from_thread(this_thr), team_id,
team->t.t_task_team[this_thr->th.th_task_state],
this_thr->th.th_task_team));
if (this_thr->th.th_task_team)
KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
team->t.t_task_team[this_thr->th.th_task_state]);
KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, this_thr);
}
#endif /* KMP_DEBUG */

Expand Down Expand Up @@ -2440,10 +2437,8 @@ void __kmp_fork_barrier(int gtid, int tid) {
}
#endif

if (__kmp_tasking_mode != tskm_immediate_exec) {
// 0 indicates setup current task team if nthreads > 1
__kmp_task_team_setup(this_thr, team, 0);
}
if (__kmp_tasking_mode != tskm_immediate_exec)
__kmp_task_team_setup(this_thr, team);

/* The primary thread may have changed its blocktime between join barrier
and fork barrier. Copy the blocktime info to the thread, where
Expand Down
11 changes: 11 additions & 0 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,12 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
serial_team->t.t_dispatch->th_disp_buffer->next;
__kmp_free(disp_buffer);
}

/* pop the task team stack */
if (serial_team->t.t_serialized > 1) {
__kmp_pop_task_team_node(this_thr, serial_team);
}

this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore

--serial_team->t.t_serialized;
Expand Down Expand Up @@ -692,6 +698,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_current_task->td_flags.executing = 1;

if (__kmp_tasking_mode != tskm_immediate_exec) {
// Restore task state from serial team structure
KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
serial_team->t.t_primary_task_state == 1);
this_thr->th.th_task_state =
(kmp_uint8)serial_team->t.t_primary_task_state;
// Copy the task team from the new child / old parent team to the thread.
this_thr->th.th_task_team =
this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Expand Down
Loading