@@ -1948,6 +1948,9 @@ struct llama_context {
1948
1948
std::vector<uint8_t> buf_compute_meta;
1949
1949
ggml_backend_sched_t sched = nullptr;
1950
1950
1951
+ ggml_abort_callback abort_callback = nullptr;
1952
+ void * abort_callback_data = nullptr;
1953
+
1951
1954
// input tensors
1952
1955
ggml_backend_buffer_t buf_input = nullptr;
1953
1956
ggml_context * ctx_input = nullptr;
@@ -7847,6 +7850,7 @@ static void llama_graph_compute(
7847
7850
7848
7851
if (lctx.backend_cpu != nullptr) {
7849
7852
ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
7853
+ ggml_backend_cpu_set_abort_callback(lctx.backend_cpu, lctx.abort_callback, lctx.abort_callback_data);
7850
7854
}
7851
7855
7852
7856
ggml_backend_sched_graph_compute(lctx.sched, gf);
@@ -11644,6 +11648,8 @@ struct llama_context_params llama_context_default_params() {
11644
11648
/*.embedding =*/ false,
11645
11649
/*.offload_kqv =*/ true,
11646
11650
/*.do_pooling =*/ true,
11651
+ /*.abort_callback =*/ nullptr,
11652
+ /*.abort_callback_data =*/ nullptr,
11647
11653
};
11648
11654
11649
11655
return result;
@@ -11835,8 +11841,11 @@ struct llama_context * llama_new_context_with_model(
11835
11841
LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, cparams.rope_freq_base);
11836
11842
LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, cparams.rope_freq_scale);
11837
11843
11838
- ctx->rng = std::mt19937(params.seed);
11839
- ctx->logits_all = params.logits_all;
11844
+ ctx->abort_callback = params.abort_callback;
11845
+ ctx->abort_callback_data = params.abort_callback_data;
11846
+
11847
+ ctx->rng = std::mt19937(params.seed);
11848
+ ctx->logits_all = params.logits_all;
11840
11849
11841
11850
const ggml_type type_k = params.type_k;
11842
11851
const ggml_type type_v = params.type_v;
@@ -12809,6 +12818,11 @@ void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_
12809
12818
ctx->cparams.n_threads_batch = n_threads_batch;
12810
12819
}
12811
12820
12821
+ void llama_set_abort_callback(struct llama_context * ctx, bool (*abort_callback)(void * data), void * abort_callback_data) {
12822
+ ctx->abort_callback = abort_callback;
12823
+ ctx->abort_callback_data = abort_callback_data;
12824
+ }
12825
+
12812
12826
struct llama_batch llama_batch_get_one(
12813
12827
llama_token * tokens,
12814
12828
int32_t n_tokens,
0 commit comments