Skip to content

Commit cc9299c

Browse files
committed
update backends
ggml-ci
1 parent 0661e6a commit cc9299c

File tree

6 files changed

+6
-6
lines changed

6 files changed

+6
-6
lines changed

ggml-cuda.cu

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7791,10 +7791,6 @@ struct cuda_pool_alloc {
77917791

77927792
static bool g_cublas_loaded = false;
77937793

7794-
static bool ggml_cublas_loaded(void) {
7795-
return g_cublas_loaded;
7796-
}
7797-
77987794
static void ggml_init_cublas() {
77997795
static bool initialized = false;
78007796

@@ -11381,7 +11377,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
1138111377
GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
1138211378
const int min_batch_size = 32;
1138311379

11384-
return op->ne[1] > min_batch_size && op->op != GGML_OP_GET_ROWS;
11380+
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
1138511381

1138611382
UNUSED(backend);
1138711383
}

ggml-kompute.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,7 @@ static struct ggml_backend_i kompute_backend_i = {
19511951
/* .graph_plan_compute = */ NULL,
19521952
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
19531953
/* .supports_op = */ ggml_backend_kompute_supports_op,
1954+
/* .offload_op = */ NULL,
19541955
/* .event_new = */ NULL,
19551956
/* .event_free = */ NULL,
19561957
/* .event_record = */ NULL,

ggml-metal.m

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,7 @@ GGML_CALL static bool ggml_backend_metal_supports_op(ggml_backend_t backend, con
28372837
/* .graph_plan_compute = */ NULL,
28382838
/* .graph_compute = */ ggml_backend_metal_graph_compute,
28392839
/* .supports_op = */ ggml_backend_metal_supports_op,
2840+
/* .offload_op = */ NULL,
28402841
/* .event_new = */ NULL,
28412842
/* .event_free = */ NULL,
28422843
/* .event_record = */ NULL,

ggml-sycl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17390,6 +17390,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
1739017390
/* .graph_plan_compute = */ NULL,
1739117391
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
1739217392
/* .supports_op = */ ggml_backend_sycl_supports_op,
17393+
/* .offload_op = */ NULL,
1739317394
/* .event_new = */ NULL,
1739417395
/* .event_free = */ NULL,
1739517396
/* .event_record = */ NULL,

ggml-vulkan.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5693,6 +5693,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
56935693
/* .graph_plan_compute = */ NULL,
56945694
/* .graph_compute = */ ggml_backend_vk_graph_compute,
56955695
/* .supports_op = */ ggml_backend_vk_supports_op,
5696+
/* .offload_op = */ NULL,
56965697
/* .event_new = */ NULL,
56975698
/* .event_free = */ NULL,
56985699
/* .event_record = */ NULL,

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8614,7 +8614,7 @@ static struct ggml_cgraph * llama_build_graph(
86148614
// norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
86158615
// FIXME: fix in ggml_backend_sched
86168616
const bool full_offload = lctx.model.n_gpu_layers > (int)lctx.model.hparams.n_layer;
8617-
if (batch.n_tokens <= 32 || full_offload) {
8617+
if (batch.n_tokens < 32 || full_offload) {
86188618
if (il != -1 && strcmp(name, "norm") == 0) {
86198619
for (auto * backend : lctx.backends) {
86208620
if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {

0 commit comments

Comments
 (0)