File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -14556,10 +14556,12 @@ static int llama_decode_internal(
14556
14556
ggml_set_cached_graph(lctx.sched,lctx.cached_graph.is_active);
14557
14557
14558
14558
// Disable future graph caching in presence of env var,
14559
- // if there are multiple devices, or if batch size is greater than 1
14559
+ // if there are multiple devices, if batch size is greater than 1,
14560
+ // or if nsplits is not 2.
14560
14561
// TO DO enable graph caching for these cases
14561
14562
bool disable_cached_ggml_graph = (getenv("GGML_DISABLE_GRAPH_CACHING") != nullptr)
14562
- || (llama_get_device_count(model) > 1);
14563
+ || (llama_get_device_count(model) > 1)
14564
+ || (ggml_backend_sched_get_n_splits(lctx.sched) != 2);
14563
14565
for (int i = 0 ; i < gf->n_nodes; i++) {
14564
14566
if (gf->nodes[i]->op == GGML_OP_ADD && gf->nodes[i]->src[1] && gf->nodes[i]->src[1]->ne[1] > 1) {
14565
14567
disable_cached_ggml_graph = true;
You can’t perform that action at this time.
0 commit comments