Skip to content

Commit 326e4d9

Browse files
committed
llama: propagating the results of graph_compute to the user interface
1 parent 1d48e98 commit 326e4d9

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

src/llama.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16612,7 +16612,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
1661216612
}
1661316613
}
1661416614

16615-
static void llama_graph_compute(
16615+
static enum ggml_status llama_graph_compute(
1661616616
llama_context & lctx,
1661716617
ggml_cgraph * gf,
1661816618
int n_threads,
@@ -16634,9 +16634,11 @@ static void llama_graph_compute(
1663416634
}
1663516635
#endif
1663616636

16637-
ggml_backend_sched_graph_compute_async(lctx.sched, gf);
16637+
auto status = ggml_backend_sched_graph_compute_async(lctx.sched, gf);
1663816638

1663916639
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
16640+
16641+
return status;
1664016642
}
1664116643

1664216644
// decode a batch of tokens by evaluating the transformer
@@ -16818,7 +16820,18 @@ static int llama_decode_internal(
1681816820

1681916821
llama_set_inputs(lctx, ubatch);
1682016822

16821-
llama_graph_compute(lctx, gf, n_threads, threadpool);
16823+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
16824+
switch (compute_status) {
16825+
case GGML_STATUS_SUCCESS:
16826+
break;
16827+
case GGML_STATUS_ABORTED:
16828+
return 2;
16829+
case GGML_STATUS_ALLOC_FAILED:
16830+
return -2;
16831+
case GGML_STATUS_FAILED:
16832+
default:
16833+
return -3;
16834+
}
1682216835

1682316836
// update the kv ring buffer
1682416837
{
@@ -17038,7 +17051,18 @@ static int llama_encode_internal(
1703817051

1703917052
llama_set_inputs(lctx, ubatch);
1704017053

17041-
llama_graph_compute(lctx, gf, n_threads, threadpool);
17054+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
17055+
switch (compute_status) {
17056+
case GGML_STATUS_SUCCESS:
17057+
break;
17058+
case GGML_STATUS_ABORTED:
17059+
return 2;
17060+
case GGML_STATUS_ALLOC_FAILED:
17061+
return -2;
17062+
case GGML_STATUS_FAILED:
17063+
default:
17064+
return -3;
17065+
}
1704217066

1704317067
// extract embeddings
1704417068
if (embd) {

0 commit comments

Comments
 (0)