Skip to content

Commit 41ca910

Browse files
authored
[OpenMP] Fix task state and taskteams for serial teams (#86859)
* Serial teams now use a stack (similar to dispatch buffers) * Serial teams always use `t_task_team[0]` as the task team and the second pointer is a next pointer for the stack `t_task_team[1]` is interpreted as a stack of task teams where each level is a nested level ``` inner serial team outer serial team [ t_task_team[0] ] -> (task_team) [ t_task_team[0] ] -> (task_team) [ next ] ----------------> [ next ] -> ... ``` * Remove the task state memo stack from thread structure. * Instead of a thread-private stack, use team structure to store th_task_state of the primary thread. When coming out of a parallel, restore the primary thread's task state. The new field in the team structure doesn't cause sizeof(team) to change and is in the cache line which is only read/written by the primary thread. Fixes: #50602 Fixes: #69368 Fixes: #69733 Fixes: #79416
1 parent 1d87465 commit 41ca910

File tree

11 files changed

+792
-236
lines changed

11 files changed

+792
-236
lines changed

openmp/runtime/src/kmp.h

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,6 +2871,11 @@ union KMP_ALIGN_CACHE kmp_task_team {
28712871
char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
28722872
};
28732873

2874+
typedef struct kmp_task_team_list_t {
2875+
kmp_task_team_t *task_team;
2876+
kmp_task_team_list_t *next;
2877+
} kmp_task_team_list_t;
2878+
28742879
#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
28752880
// Free lists keep same-size free memory slots for fast memory allocation
28762881
// routines
@@ -3008,10 +3013,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
30083013
kmp_task_team_t *th_task_team; // Task team struct
30093014
kmp_taskdata_t *th_current_task; // Innermost Task being executed
30103015
kmp_uint8 th_task_state; // alternating 0/1 for task team identification
3011-
kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
3012-
// at nested levels
3013-
kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
3014-
kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
30153016
kmp_uint32 th_reap_state; // Non-zero indicates thread is not
30163017
// tasking, thus safe to reap
30173018

@@ -3133,6 +3134,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
31333134
kmp_disp_t *t_dispatch; // thread's dispatch data
31343135
kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
31353136
kmp_proc_bind_t t_proc_bind; // bind type for par region
3137+
int t_primary_task_state; // primary thread's task state saved
31363138
#if USE_ITT_BUILD
31373139
kmp_uint64 t_region_time; // region begin timestamp
31383140
#endif /* USE_ITT_BUILD */
@@ -3204,6 +3206,12 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
32043206
distributedBarrier *b; // Distributed barrier data associated with team
32053207
} kmp_base_team_t;
32063208

3209+
// Assert that the list structure fits and aligns within
3210+
// the double task team pointer
3211+
KMP_BUILD_ASSERT(sizeof(kmp_task_team_t *[2]) == sizeof(kmp_task_team_list_t));
3212+
KMP_BUILD_ASSERT(alignof(kmp_task_team_t *[2]) ==
3213+
alignof(kmp_task_team_list_t));
3214+
32073215
union KMP_ALIGN_CACHE kmp_team {
32083216
kmp_base_team_t t;
32093217
double t_align; /* use worst case alignment */
@@ -4114,9 +4122,10 @@ extern void __kmp_fulfill_event(kmp_event_t *event);
41144122
extern void __kmp_free_task_team(kmp_info_t *thread,
41154123
kmp_task_team_t *task_team);
41164124
extern void __kmp_reap_task_teams(void);
4125+
extern void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team);
4126+
extern void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team);
41174127
extern void __kmp_wait_to_unref_task_teams(void);
4118-
extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
4119-
int always);
4128+
extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team);
41204129
extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
41214130
extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
41224131
#if USE_ITT_BUILD
@@ -4127,6 +4136,14 @@ extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
41274136
int wait = 1);
41284137
extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
41294138
int gtid);
4139+
#if KMP_DEBUG
4140+
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) \
4141+
KMP_DEBUG_ASSERT( \
4142+
__kmp_tasking_mode != tskm_task_teams || team->t.t_nproc == 1 || \
4143+
thr->th.th_task_team == team->t.t_task_team[thr->th.th_task_state])
4144+
#else
4145+
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) /* Nothing */
4146+
#endif
41304147

41314148
extern int __kmp_is_address_mapped(void *addr);
41324149
extern kmp_uint64 __kmp_hardware_timestamp(void);

openmp/runtime/src/kmp_barrier.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,8 +1858,7 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
18581858
}
18591859

18601860
if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1861-
// use 0 to only setup the current team if nthreads > 1
1862-
__kmp_task_team_setup(this_thr, team, 0);
1861+
__kmp_task_team_setup(this_thr, team);
18631862

18641863
if (cancellable) {
18651864
cancelled = __kmp_linear_barrier_gather_cancellable(
@@ -2042,7 +2041,7 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
20422041
this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
20432042
TRUE);
20442043
__kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
2045-
__kmp_task_team_setup(this_thr, team, 0);
2044+
__kmp_task_team_setup(this_thr, team);
20462045

20472046
#if USE_ITT_BUILD
20482047
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
@@ -2243,9 +2242,7 @@ void __kmp_join_barrier(int gtid) {
22432242
__kmp_gtid_from_thread(this_thr), team_id,
22442243
team->t.t_task_team[this_thr->th.th_task_state],
22452244
this_thr->th.th_task_team));
2246-
if (this_thr->th.th_task_team)
2247-
KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
2248-
team->t.t_task_team[this_thr->th.th_task_state]);
2245+
KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, this_thr);
22492246
}
22502247
#endif /* KMP_DEBUG */
22512248

@@ -2440,10 +2437,8 @@ void __kmp_fork_barrier(int gtid, int tid) {
24402437
}
24412438
#endif
24422439

2443-
if (__kmp_tasking_mode != tskm_immediate_exec) {
2444-
// 0 indicates setup current task team if nthreads > 1
2445-
__kmp_task_team_setup(this_thr, team, 0);
2446-
}
2440+
if (__kmp_tasking_mode != tskm_immediate_exec)
2441+
__kmp_task_team_setup(this_thr, team);
24472442

24482443
/* The primary thread may have changed its blocktime between join barrier
24492444
and fork barrier. Copy the blocktime info to the thread, where

openmp/runtime/src/kmp_csupport.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,12 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
654654
serial_team->t.t_dispatch->th_disp_buffer->next;
655655
__kmp_free(disp_buffer);
656656
}
657+
658+
/* pop the task team stack */
659+
if (serial_team->t.t_serialized > 1) {
660+
__kmp_pop_task_team_node(this_thr, serial_team);
661+
}
662+
657663
this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
658664

659665
--serial_team->t.t_serialized;
@@ -692,6 +698,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
692698
this_thr->th.th_current_task->td_flags.executing = 1;
693699

694700
if (__kmp_tasking_mode != tskm_immediate_exec) {
701+
// Restore task state from serial team structure
702+
KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
703+
serial_team->t.t_primary_task_state == 1);
704+
this_thr->th.th_task_state =
705+
(kmp_uint8)serial_team->t.t_primary_task_state;
695706
// Copy the task team from the new child / old parent team to the thread.
696707
this_thr->th.th_task_team =
697708
this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];

0 commit comments

Comments
 (0)