Skip to content

Commit 5e354e3

Browse files
committed
llama: propagating the results of graph_compute to the user interface
1 parent 6423c65 commit 5e354e3

File tree

1 file changed

+30
-6
lines changed

1 file changed

+30
-6
lines changed

src/llama.cpp

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17181,7 +17181,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
1718117181
}
1718217182
}
1718317183

17184-
static void llama_graph_compute(
17184+
static enum ggml_status llama_graph_compute(
1718517185
llama_context & lctx,
1718617186
ggml_cgraph * gf,
1718717187
int n_threads,
@@ -17196,12 +17196,14 @@ static void llama_graph_compute(
1719617196
set_n_threads_fn.second(set_n_threads_fn.first, n_threads);
1719717197
}
1719817198

17199-
auto err = ggml_backend_sched_graph_compute_async(lctx.sched.get(), gf);
17200-
if (err != GGML_STATUS_SUCCESS) {
17201-
LLAMA_LOG_ERROR("%s: ggml_backend_sched_graph_compute_async failed with error %d\n", __func__, err);
17199+
auto status = ggml_backend_sched_graph_compute_async(lctx.sched.get(), gf);
17200+
if (status != GGML_STATUS_SUCCESS) {
17201+
LLAMA_LOG_ERROR("%s: ggml_backend_sched_graph_compute_async failed with error %d\n", __func__, status);
1720217202
}
1720317203

1720417204
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
17205+
17206+
return status;
1720517207
}
1720617208

1720717209
// decode a batch of tokens by evaluating the transformer
@@ -17387,7 +17389,18 @@ static int llama_decode_internal(
1738717389

1738817390
llama_set_inputs(lctx, ubatch);
1738917391

17390-
llama_graph_compute(lctx, gf, n_threads, threadpool);
17392+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
17393+
switch (compute_status) {
17394+
case GGML_STATUS_SUCCESS:
17395+
break;
17396+
case GGML_STATUS_ABORTED:
17397+
return 2;
17398+
case GGML_STATUS_ALLOC_FAILED:
17399+
return -2;
17400+
case GGML_STATUS_FAILED:
17401+
default:
17402+
return -3;
17403+
}
1739117404

1739217405
// update the kv ring buffer
1739317406
{
@@ -17624,7 +17637,18 @@ static int llama_encode_internal(
1762417637

1762517638
llama_set_inputs(lctx, ubatch);
1762617639

17627-
llama_graph_compute(lctx, gf, n_threads, threadpool);
17640+
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
17641+
switch (compute_status) {
17642+
case GGML_STATUS_SUCCESS:
17643+
break;
17644+
case GGML_STATUS_ABORTED:
17645+
return 2;
17646+
case GGML_STATUS_ALLOC_FAILED:
17647+
return -2;
17648+
case GGML_STATUS_FAILED:
17649+
default:
17650+
return -3;
17651+
}
1762817652

1762917653
// extract embeddings
1763017654
if (embd) {

0 commit comments

Comments
 (0)