Skip to content

Commit d2e518e

Browse files
issixxissi
authored andcommitted
ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. (ggml/1065)
some threads kept looping and failed to terminate properly after an abort during CPU execution. Co-authored-by: issi <[email protected]>
1 parent b636228 commit d2e518e

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,7 @@ struct ggml_threadpool {
13021302
// these are atomic as an annotation for thread-sanitizer
13031303
atomic_bool stop; // Used for stopping the threadpool altogether
13041304
atomic_bool pause; // Used for pausing the threadpool or individual threads
1305-
atomic_bool abort; // Used for aborting processing of a graph
1305+
atomic_int abort; // Used for aborting processing of a graph
13061306

13071307
struct ggml_compute_state * workers; // per thread state
13081308
int n_threads_max; // number of threads in the pool
@@ -13851,14 +13851,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1385113851
/*.threadpool=*/ tp,
1385213852
};
1385313853

13854-
for (int node_n = 0; node_n < cgraph->n_nodes && !tp->abort; node_n++) {
13854+
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
1385513855
struct ggml_tensor * node = cgraph->nodes[node_n];
1385613856

1385713857
ggml_compute_forward(&params, node);
1385813858

1385913859
if (state->ith == 0 && cplan->abort_callback &&
1386013860
cplan->abort_callback(cplan->abort_callback_data)) {
13861-
tp->abort = true;
13861+
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
1386213862
tp->ec = GGML_STATUS_ABORTED;
1386313863
}
1386413864

@@ -14031,7 +14031,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
1403114031
threadpool->current_chunk = 0;
1403214032
threadpool->stop = false;
1403314033
threadpool->pause = tpp->paused;
14034-
threadpool->abort = false;
14034+
threadpool->abort = -1;
1403514035
threadpool->workers = NULL;
1403614036
threadpool->n_threads_max = tpp->n_threads;
1403714037
threadpool->n_threads_cur = tpp->n_threads;
@@ -14110,7 +14110,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1411014110
threadpool->cgraph = cgraph;
1411114111
threadpool->cplan = cplan;
1411214112
threadpool->current_chunk = 0;
14113-
threadpool->abort = false;
14113+
threadpool->abort = -1;
1411414114
threadpool->ec = GGML_STATUS_SUCCESS;
1411514115
}
1411614116

0 commit comments

Comments
 (0)