Skip to content

Commit a34900a

Browse files
committed
restrict to nsplit=2
1 parent b7956a8 commit a34900a

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/llama.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14556,10 +14556,12 @@ static int llama_decode_internal(
1455614556
ggml_set_cached_graph(lctx.sched,lctx.cached_graph.is_active);
1455714557

1455814558
// Disable future graph caching in presence of env var,
14559-
// if there are multiple devices, or if batch size is greater than 1
14559+
// if there are multiple devices, if batch size is greater than 1,
14560+
// or if nsplits is not 2.
1456014561
// TO DO enable graph caching for these cases
1456114562
bool disable_cached_ggml_graph = (getenv("GGML_DISABLE_GRAPH_CACHING") != nullptr)
14562-
|| (llama_get_device_count(model) > 1);
14563+
|| (llama_get_device_count(model) > 1)
14564+
|| (ggml_backend_sched_get_n_splits(lctx.sched) != 2);
1456314565
for (int i = 0 ; i < gf->n_nodes; i++) {
1456414566
if (gf->nodes[i]->op == GGML_OP_ADD && gf->nodes[i]->src[1] && gf->nodes[i]->src[1]->ne[1] > 1) {
1456514567
disable_cached_ggml_graph = true;

0 commit comments

Comments
 (0)