Skip to content

Commit e73fcdf

Browse files
ggerganovslaren
authored andcommitted
ggml : add metal backend registry / device (ggml-org#9713)
* ggml : add metal backend registry / device ggml-ci * metal : fix names [no ci] * metal : global registry and device instances ggml-ci * cont : alternative initialization of global objects ggml-ci * llama : adapt to backend changes ggml-ci * fixes * metal : fix indent * metal : fix build when MTLGPUFamilyApple3 is not available ggml-ci * fix merge * metal : avoid unnecessary singleton accesses ggml-ci * metal : minor fix [no ci] * metal : g_state -> g_ggml_ctx_dev_main [no ci] * metal : avoid reference of device context in the backend context ggml-ci * metal : minor [no ci] * metal : fix maxTransferRate check * metal : remove transfer rate stuff --------- Co-authored-by: slaren <[email protected]>
1 parent ae65769 commit e73fcdf

File tree

6 files changed

+535
-284
lines changed

6 files changed

+535
-284
lines changed

ggml/include/ggml-backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ extern "C" {
127127
bool async;
128128
// pinned host buffer
129129
bool host_buffer;
130+
// creating buffers from host ptr
131+
bool buffer_from_host_ptr;
130132
// event synchronization
131133
bool events;
132134
};

ggml/include/ggml-metal.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
4343

4444
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
4545

46-
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
46+
GGML_DEPRECATED(
47+
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48+
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
4749

4850
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
4951

@@ -57,6 +59,8 @@ GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int fam
5759
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
5860
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
5961

62+
GGML_API ggml_backend_reg_t ggml_backend_metal_reg(void);
63+
6064
#ifdef __cplusplus
6165
}
6266
#endif

ggml/src/ggml-backend.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
463463
}
464464

465465
void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
466+
memset(props, 0, sizeof(*props));
466467
device->iface.get_props(device, props);
467468
}
468469

@@ -479,6 +480,10 @@ ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t devic
479480
}
480481

481482
ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
483+
if (device->iface.get_host_buffer_type == NULL) {
484+
return NULL;
485+
}
486+
482487
return device->iface.get_host_buffer_type(device);
483488
}
484489

@@ -525,6 +530,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
525530
#include "ggml-cuda.h"
526531
#endif
527532

533+
#ifdef GGML_USE_METAL
534+
#include "ggml-metal.h"
535+
#endif
536+
528537
struct ggml_backend_registry {
529538
std::vector<ggml_backend_reg_t> backends;
530539
std::vector<ggml_backend_dev_t> devices;
@@ -533,10 +542,13 @@ struct ggml_backend_registry {
533542
#ifdef GGML_USE_CUDA
534543
register_backend(ggml_backend_cuda_reg());
535544
#endif
545+
#ifdef GGML_USE_METAL
546+
register_backend(ggml_backend_metal_reg());
547+
#endif
536548

537549
register_backend(ggml_backend_cpu_reg());
538550

539-
// TODO: sycl, metal, vulkan, kompute, cann
551+
// TODO: sycl, vulkan, kompute, cann
540552
}
541553

542554
void register_backend(ggml_backend_reg_t reg) {
@@ -1118,9 +1130,10 @@ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggm
11181130
props->type = ggml_backend_cpu_device_get_type(dev);
11191131
ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
11201132
props->caps = {
1121-
/* async */ false,
1122-
/* host_buffer */ false,
1123-
/* events */ false,
1133+
/* .async = */ false,
1134+
/* .host_buffer = */ false,
1135+
/* .buffer_from_host_ptr = */ true,
1136+
/* .events = */ false,
11241137
};
11251138
}
11261139

ggml/src/ggml-cuda.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2920,9 +2920,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
29202920
#endif
29212921

29222922
props->caps = {
2923-
/* async */ true,
2924-
/* host_buffer */ host_buffer,
2925-
/* events */ events,
2923+
/* .async = */ true,
2924+
/* .host_buffer = */ host_buffer,
2925+
/* .buffer_from_host_ptr = */ false,
2926+
/* .events = */ events,
29262927
};
29272928
}
29282929

0 commit comments

Comments
 (0)