@@ -1018,10 +1018,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
1018
1018
#define GGML_SCHED_MAX_BACKENDS 16
1019
1019
#endif
1020
1020
1021
- #ifndef GGML_SCHED_MAX_SPLITS
1022
- #define GGML_SCHED_MAX_SPLITS 2048
1023
- #endif
1024
-
1025
1021
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
1026
1022
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
1027
1023
#endif
@@ -1125,7 +1121,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
1125
1121
}
1126
1122
1127
1123
#if 0
1128
- static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
1124
+ #define GGML_SCHED_MAX_SPLITS_DEBUG 4096
1125
+ static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS_DEBUG * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
1129
1126
#define SET_CAUSE (node , ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
1130
1127
#define GET_CAUSE (node ) causes[hash_id(node)]
1131
1128
#else
@@ -1549,7 +1546,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1549
1546
sched -> splits = realloc (sched -> splits , sched -> splits_capacity * sizeof (struct ggml_backend_sched_split ));
1550
1547
GGML_ASSERT (sched -> splits != NULL );
1551
1548
}
1552
- GGML_ASSERT (i_split < GGML_SCHED_MAX_SPLITS );
1553
1549
split = & sched -> splits [i_split ];
1554
1550
split -> backend_id = node_backend_id ;
1555
1551
split -> i_start = i ;
@@ -1865,13 +1861,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
1865
1861
sched -> hv_tensor_backend_ids = malloc (sched -> hash_set .size * sizeof (sched -> hv_tensor_backend_ids [0 ]));
1866
1862
sched -> hv_tensor_copies = malloc (sched -> hash_set .size * sched -> n_backends * sched -> n_copies * sizeof (struct ggml_tensor * ));
1867
1863
1868
- const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
1864
+ const size_t ggml_sched_max_splits = graph_size ; // at most there is one split for each node in the graph
1865
+ const size_t nodes_size = graph_size + ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
1869
1866
sched -> node_backend_ids = calloc (nodes_size , sizeof (sched -> node_backend_ids [0 ]));
1870
1867
sched -> leaf_backend_ids = calloc (nodes_size , sizeof (sched -> leaf_backend_ids [0 ]));
1871
1868
sched -> prev_node_backend_ids = calloc (nodes_size , sizeof (sched -> prev_node_backend_ids [0 ]));
1872
1869
sched -> prev_leaf_backend_ids = calloc (nodes_size , sizeof (sched -> prev_leaf_backend_ids [0 ]));
1873
1870
1874
- sched -> context_buffer_size = GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
1871
+ sched -> context_buffer_size = ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
1875
1872
sched -> context_buffer = malloc (sched -> context_buffer_size );
1876
1873
1877
1874
const int initial_splits_capacity = 16 ;
0 commit comments