Skip to content

Commit 77ad061

Browse files
authored
[OpenMP] Update OpenMP runtime to adopt taskgraph clause from 6.0 Specs (#130751)
Updating OpenMP runtime taskgraph support(record/replay mechanism): - Adds a `graph_reset` bit in `kmp_taskgraph_flags_t` to discard existing TDG records. - Switches from a strict index-based TDG ID/IDX to a more flexible integer-based, which can be any integer (e.g. hashed). - Adds helper functions like `__kmp_find_tdg`, `__kmp_alloc_tdg`, and `__kmp_free_tdg` to manage TDGs by their IDs. These changes pave the way for the integration of OpenMP taskgraph (spec 6.0). Taskgraphs are still recorded in an array with a lookup efficiency reduced to O(n), where n ≤ `__kmp_max_tdgs`. This can be optimized by moving the TDGs to a hashtable, making lookups more efficient. The provided helper routines facilitate easier future optimizations.
1 parent 467ad6a commit 77ad061

File tree

5 files changed

+189
-44
lines changed

5 files changed

+189
-44
lines changed

openmp/runtime/src/kmp.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2606,7 +2606,9 @@ typedef struct {
26062606
typedef struct kmp_taskgraph_flags { /*This needs to be exactly 32 bits */
26072607
unsigned nowait : 1;
26082608
unsigned re_record : 1;
2609-
unsigned reserved : 30;
2609+
unsigned graph_reset : 1; /* 1==discard taskgraph record, 0==use taskgraph
2610+
record */
2611+
unsigned reserved : 29;
26102612
} kmp_taskgraph_flags_t;
26112613

26122614
/// Represents a TDG node
@@ -2650,7 +2652,7 @@ typedef struct kmp_tdg_info {
26502652
extern int __kmp_tdg_dot;
26512653
extern kmp_int32 __kmp_max_tdgs;
26522654
extern kmp_tdg_info_t **__kmp_global_tdgs;
2653-
extern kmp_int32 __kmp_curr_tdg_idx;
2655+
extern kmp_tdg_info_t *__kmp_curr_tdg;
26542656
extern kmp_int32 __kmp_successors_size;
26552657
extern std::atomic<kmp_int32> __kmp_tdg_task_id;
26562658
extern kmp_int32 __kmp_num_tdg;

openmp/runtime/src/kmp_global.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,7 @@ int *__kmp_nesting_nth_level;
554554
int __kmp_tdg_dot = 0;
555555
kmp_int32 __kmp_max_tdgs = 100;
556556
kmp_tdg_info_t **__kmp_global_tdgs = NULL;
557-
kmp_int32 __kmp_curr_tdg_idx =
558-
0; // Id of the current TDG being recorded or executed
557+
kmp_tdg_info_t *__kmp_curr_tdg = NULL; // Current TDG being recorded or executed
559558
kmp_int32 __kmp_num_tdg = 0;
560559
kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for
561560
// recording

openmp/runtime/src/kmp_tasking.cpp

Lines changed: 90 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,11 +1651,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
16511651
}
16521652

16531653
#if OMPX_TASKGRAPH
1654-
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
1654+
kmp_tdg_info_t *tdg = __kmp_curr_tdg;
16551655
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
16561656
(task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
16571657
taskdata->is_taskgraph = 1;
1658-
taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
1658+
taskdata->tdg = tdg;
16591659
taskdata->td_task_id = KMP_GEN_TASK_ID();
16601660
taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
16611661
}
@@ -2577,14 +2577,11 @@ without help of the runtime library.
25772577
*/
25782578
void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
25792579
#if OMPX_TASKGRAPH
2580-
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2580+
kmp_tdg_info_t *tdg = __kmp_curr_tdg;
25812581
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2582-
kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2583-
this_tdg->rec_taskred_data =
2584-
__kmp_allocate(sizeof(kmp_task_red_input_t) * num);
2585-
this_tdg->rec_num_taskred = num;
2586-
KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2587-
sizeof(kmp_task_red_input_t) * num);
2582+
tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
2583+
tdg->rec_num_taskred = num;
2584+
KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
25882585
}
25892586
#endif
25902587
return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
@@ -2604,14 +2601,11 @@ has two parameters, pointer to object to be initialized and pointer to omp_orig
26042601
*/
26052602
void *__kmpc_taskred_init(int gtid, int num, void *data) {
26062603
#if OMPX_TASKGRAPH
2607-
kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2604+
kmp_tdg_info_t *tdg = __kmp_curr_tdg;
26082605
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2609-
kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2610-
this_tdg->rec_taskred_data =
2611-
__kmp_allocate(sizeof(kmp_task_red_input_t) * num);
2612-
this_tdg->rec_num_taskred = num;
2613-
KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2614-
sizeof(kmp_task_red_input_t) * num);
2606+
tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
2607+
tdg->rec_num_taskred = num;
2608+
KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
26152609
}
26162610
#endif
26172611
return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
@@ -2662,8 +2656,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
26622656

26632657
#if OMPX_TASKGRAPH
26642658
if ((thread->th.th_current_task->is_taskgraph) &&
2665-
(!__kmp_tdg_is_recording(
2666-
__kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
2659+
(!__kmp_tdg_is_recording(__kmp_curr_tdg->tdg_status))) {
26672660
tg = thread->th.th_current_task->td_taskgroup;
26682661
KMP_ASSERT(tg != NULL);
26692662
KMP_ASSERT(tg->reduce_data != NULL);
@@ -5452,7 +5445,6 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
54525445

54535446
#if OMPX_TASKGRAPH
54545447
// __kmp_find_tdg: identify a TDG through its ID
5455-
// gtid: Global Thread ID
54565448
// tdg_id: ID of the TDG
54575449
// returns: If a TDG corresponding to this ID is found and not
54585450
// its initial state, return the pointer to it, otherwise nullptr
@@ -5465,12 +5457,71 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
54655457
__kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
54665458
sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
54675459

5468-
if ((__kmp_global_tdgs[tdg_id]) &&
5469-
(__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
5470-
res = __kmp_global_tdgs[tdg_id];
5460+
for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
5461+
if (__kmp_global_tdgs[tdg_idx] &&
5462+
__kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
5463+
if (__kmp_global_tdgs[tdg_idx]->tdg_status != KMP_TDG_NONE)
5464+
res = __kmp_global_tdgs[tdg_idx];
5465+
break;
5466+
}
5467+
}
54715468
return res;
54725469
}
54735470

5471+
// __kmp_alloc_tdg: Allocates a TDG if it doesn't already exist.
5472+
// tdg_id: ID of the TDG.
5473+
// returns: A pointer to the TDG if it already exists. Otherwise,
5474+
// allocates a new TDG if the maximum limit has not been reached.
5475+
// Returns nullptr if no TDG can be allocated.
5476+
static kmp_tdg_info_t *__kmp_alloc_tdg(kmp_int32 tdg_id) {
5477+
kmp_tdg_info_t *res = nullptr;
5478+
if ((res = __kmp_find_tdg(tdg_id)))
5479+
return res;
5480+
5481+
if (__kmp_num_tdg > __kmp_max_tdgs)
5482+
return res;
5483+
5484+
for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
5485+
if (!__kmp_global_tdgs[tdg_idx]) {
5486+
kmp_tdg_info_t *tdg =
5487+
(kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
5488+
__kmp_global_tdgs[tdg_idx] = tdg;
5489+
__kmp_curr_tdg = tdg;
5490+
res = __kmp_global_tdgs[tdg_idx];
5491+
break;
5492+
}
5493+
}
5494+
return res;
5495+
}
5496+
5497+
// __kmp_free_tdg: Frees a TDG if it exists.
5498+
// tdg_id: ID of the TDG to be freed.
5499+
// returns: true if a TDG with the given ID was found and successfully freed,
5500+
// false if no such TDG exists.
5501+
static bool __kmp_free_tdg(kmp_int32 tdg_id) {
5502+
kmp_tdg_info_t *tdg = nullptr;
5503+
if (__kmp_global_tdgs == NULL)
5504+
return false;
5505+
5506+
for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
5507+
if (__kmp_global_tdgs[tdg_idx] &&
5508+
__kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
5509+
tdg = __kmp_global_tdgs[tdg_idx];
5510+
for (kmp_int map_idx = 0; map_idx < tdg->map_size; map_idx++) {
5511+
__kmp_free(tdg->record_map[map_idx].successors);
5512+
}
5513+
__kmp_free(tdg->record_map);
5514+
if (tdg->root_tasks)
5515+
__kmp_free(tdg->root_tasks);
5516+
5517+
__kmp_free(tdg);
5518+
__kmp_global_tdgs[tdg_idx] = NULL;
5519+
return true;
5520+
}
5521+
}
5522+
return false;
5523+
}
5524+
54745525
// __kmp_print_tdg_dot: prints the TDG to a dot file
54755526
// tdg: ID of the TDG
54765527
// gtid: Global Thread ID
@@ -5505,7 +5556,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
55055556
KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
55065557
}
55075558

5508-
// __kmp_start_record: launch the execution of a previous
5559+
// __kmp_exec_tdg: launch the execution of a previous
55095560
// recorded TDG
55105561
// gtid: Global Thread ID
55115562
// tdg: ID of the TDG
@@ -5565,9 +5616,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
55655616
static inline void __kmp_start_record(kmp_int32 gtid,
55665617
kmp_taskgraph_flags_t *flags,
55675618
kmp_int32 tdg_id) {
5568-
kmp_tdg_info_t *tdg =
5569-
(kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
5570-
__kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
5619+
kmp_tdg_info_t *tdg = __kmp_alloc_tdg(tdg_id);
55715620
// Initializing the TDG structure
55725621
tdg->tdg_id = tdg_id;
55735622
tdg->map_size = INIT_MAPSIZE;
@@ -5592,42 +5641,42 @@ static inline void __kmp_start_record(kmp_int32 gtid,
55925641
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
55935642
}
55945643

5595-
__kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
5644+
tdg->record_map = this_record_map;
55965645
}
55975646

55985647
// __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
55995648
// the beginning of the record process of a task region
56005649
// loc_ref: Location of TDG, not used yet
56015650
// gtid: Global Thread ID of the encountering thread
56025651
// input_flags: Flags associated with the TDG
5603-
// tdg_id: ID of the TDG to record, for now, incremental integer
5652+
// tdg_id: ID of the TDG to record
56045653
// returns: 1 if we record, otherwise, 0
56055654
kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
56065655
kmp_int32 input_flags, kmp_int32 tdg_id) {
5607-
56085656
kmp_int32 res;
56095657
kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
5610-
KA_TRACE(10,
5611-
("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
5612-
gtid, loc_ref, input_flags, tdg_id));
5658+
KA_TRACE(10, ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d "
5659+
"tdg_id=%d\n",
5660+
gtid, loc_ref, input_flags, tdg_id));
56135661

56145662
if (__kmp_max_tdgs == 0) {
5615-
KA_TRACE(
5616-
10,
5617-
("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
5618-
"__kmp_max_tdgs = 0\n",
5619-
gtid, loc_ref, input_flags, tdg_id));
5663+
KA_TRACE(10, ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d "
5664+
"tdg_id = %d, __kmp_max_tdgs = 0\n",
5665+
gtid, loc_ref, input_flags, tdg_id));
56205666
return 1;
56215667
}
56225668

56235669
__kmpc_taskgroup(loc_ref, gtid);
5670+
if (flags->graph_reset) {
5671+
__kmp_free_tdg(tdg_id);
5672+
__kmp_num_tdg--;
5673+
}
56245674
if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
56255675
// TODO: use re_record flag
56265676
__kmp_exec_tdg(gtid, tdg);
56275677
res = 0;
56285678
} else {
5629-
__kmp_curr_tdg_idx = tdg_id;
5630-
KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
5679+
KMP_DEBUG_ASSERT(__kmp_num_tdg < __kmp_max_tdgs);
56315680
__kmp_start_record(gtid, flags, tdg_id);
56325681
__kmp_num_tdg++;
56335682
res = 1;
@@ -5690,10 +5739,11 @@ void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
56905739
kmp_int32 input_flags, kmp_int32 tdg_id) {
56915740
kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
56925741

5742+
KMP_DEBUG_ASSERT(tdg != NULL);
56935743
KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
56945744
" tdg=%d with flags=%d\n",
56955745
gtid, loc_ref, tdg_id, input_flags));
5696-
if (__kmp_max_tdgs) {
5746+
if (__kmp_max_tdgs && tdg) {
56975747
// TODO: use input_flags->nowait
56985748
__kmpc_end_taskgroup(loc_ref, gtid);
56995749
if (__kmp_tdg_is_recording(tdg->tdg_status))
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// REQUIRES: ompx_taskgraph
2+
// RUN: %libomp-cxx-compile-and-run
3+
#include <iostream>
4+
#include <cassert>
5+
#define NT 10
6+
7+
// Compiler-generated code (emulation)
8+
typedef struct ident {
9+
void *dummy;
10+
} ident_t;
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
int __kmpc_global_thread_num(ident_t *);
15+
int __kmpc_start_record_task(ident_t *, int, int, int);
16+
void __kmpc_end_record_task(ident_t *, int, int, int);
17+
}
18+
#endif
19+
20+
static void func(int *num_exec) { (*num_exec)++; }
21+
22+
int main() {
23+
int num_exec = 0;
24+
int num_tasks = 0;
25+
int hash_id = 135343854;
26+
#pragma omp parallel
27+
#pragma omp single
28+
for (int iter = 0; iter < NT; ++iter) {
29+
int gtid = __kmpc_global_thread_num(nullptr);
30+
int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
31+
/* tdg_id */ hash_id);
32+
if (res) {
33+
num_tasks++;
34+
#pragma omp task
35+
func(&num_exec);
36+
}
37+
__kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
38+
/* tdg_id */ hash_id);
39+
}
40+
41+
assert(num_tasks == 1);
42+
assert(num_exec == NT);
43+
44+
std::cout << "Passed" << std::endl;
45+
return 0;
46+
}
47+
// CHECK: Passed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// REQUIRES: ompx_taskgraph
2+
// RUN: %libomp-cxx-compile-and-run
3+
#include <iostream>
4+
#include <cassert>
5+
#define NT 10
6+
7+
// Compiler-generated code (emulation)
8+
typedef struct ident {
9+
void *dummy;
10+
} ident_t;
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
int __kmpc_global_thread_num(ident_t *);
15+
int __kmpc_start_record_task(ident_t *, int, int, int);
16+
void __kmpc_end_record_task(ident_t *, int, int, int);
17+
}
18+
#endif
19+
20+
static void func(int *num_exec) { (*num_exec)++; }
21+
22+
int main() {
23+
int num_exec = 0;
24+
int num_tasks = 0;
25+
int flags = 1 << 2;
26+
#pragma omp parallel
27+
#pragma omp single
28+
for (int iter = 0; iter < NT; ++iter) {
29+
int gtid = __kmpc_global_thread_num(nullptr);
30+
int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ flags,
31+
/* tdg_id */ 0);
32+
if (res) {
33+
num_tasks++;
34+
#pragma omp task
35+
func(&num_exec);
36+
}
37+
__kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
38+
/* tdg_id */ 0);
39+
}
40+
41+
assert(num_tasks == NT);
42+
assert(num_exec == NT);
43+
44+
std::cout << "Passed" << std::endl;
45+
return 0;
46+
}
47+
// CHECK: Passed

0 commit comments

Comments
 (0)