Skip to content

Commit 0ffd861

Browse files
committed
ggml : add backend registry / device interfaces to BLAS backend
1 parent 71967c2 commit 0ffd861

File tree

8 files changed

+287
-95
lines changed

8 files changed

+287
-95
lines changed

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ extern "C" {
168168

169169
// Functions that may be obtained using ggml_backend_reg_get_proc_address
170170
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
171+
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);
171172

172173
//
173174
// Backend registry

ggml/include/ggml-blas.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1717
// for openblas and blis, this will also set the number of threads used for blas operations
1818
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

20+
GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);
21+
2022

2123
#ifdef __cplusplus
2224
}

ggml/src/CMakeLists.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,22 +190,24 @@ if (GGML_BLAS)
190190
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
191191
find_package(PkgConfig REQUIRED)
192192
if (${GGML_BLAS_VENDOR} MATCHES "Generic")
193-
pkg_check_modules(DepBLAS REQUIRED blas)
193+
pkg_check_modules(DepBLAS blas)
194194
elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
195195
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
196196
pkg_check_modules(DepBLAS openblas64)
197197
if (NOT DepBLAS_FOUND)
198-
pkg_check_modules(DepBLAS REQUIRED openblas)
198+
pkg_check_modules(DepBLAS openblas)
199199
endif()
200200
elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
201-
pkg_check_modules(DepBLAS REQUIRED blis)
201+
add_compile_definitions(GGML_BLAS_USE_BLIS)
202+
pkg_check_modules(DepBLAS blis)
202203
elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
203-
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
204+
pkg_check_modules(DepBLAS blas-atlas)
204205
elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
205-
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
206+
pkg_check_modules(DepBLAS flexiblas_api)
206207
elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
208+
add_compile_definitions(GGML_BLAS_USE_MKL)
207209
# all Intel* libraries share the same include path
208-
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
210+
pkg_check_modules(DepBLAS mkl-sdl)
209211
elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
210212
# this doesn't provide pkg-config
211213
# suggest to assign BLAS_INCLUDE_DIRS on your own

ggml/src/ggml-backend-impl.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ extern "C" {
8888

8989
void (*free)(ggml_backend_t backend);
9090

91+
// Will be moved to the device interface
9192
// buffer allocation
9293
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
9394

@@ -112,17 +113,9 @@ extern "C" {
112113

113114
// IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
114115
// new backends should implement the device interface instead
115-
116116
// These functions are being moved to the device interface
117-
// check if the backend can compute an operation
118117
bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
119-
120-
// check if the backend can use tensors allocated in a buffer type
121118
bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
122-
123-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
124-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
125-
// even if the weight has to be copied from the CPU temporarily
126119
bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
127120

128121
// (optional) event synchronization
@@ -184,9 +177,8 @@ extern "C" {
184177
// check if the backend can use tensors allocated in a buffer type
185178
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
186179

187-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
188-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
189-
// even if the weight has to be copied from the CPU temporarily
180+
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
181+
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
190182
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
191183

192184
// (optional) event synchronization

ggml/src/ggml-backend.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
495495
}
496496

497497
bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
498-
return device->iface.offload_op(device, op);
498+
if (device->iface.offload_op != NULL) {
499+
return device->iface.offload_op(device, op);
500+
}
501+
502+
return false;
499503
}
500504

501505
// Backend (reg)
@@ -525,6 +529,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
525529
#include "ggml-cuda.h"
526530
#endif
527531

532+
#ifdef GGML_USE_BLAS
533+
#include "ggml-blas.h"
534+
#endif
535+
528536
struct ggml_backend_registry {
529537
std::vector<ggml_backend_reg_t> backends;
530538
std::vector<ggml_backend_dev_t> devices;
@@ -534,6 +542,10 @@ struct ggml_backend_registry {
534542
register_backend(ggml_backend_cuda_reg());
535543
#endif
536544

545+
#ifdef GGML_USE_BLAS
546+
register_backend(ggml_backend_blas_reg());
547+
#endif
548+
537549
register_backend(ggml_backend_cpu_reg());
538550

539551
// TODO: sycl, metal, vulkan, kompute, cann
@@ -1216,16 +1228,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
12161228
};
12171229

12181230
return &ggml_backend_cpu_device;
1231+
}
1232+
1233+
static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
1234+
if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
1235+
return (void *)ggml_backend_cpu_set_n_threads;
1236+
}
1237+
return NULL;
12191238

12201239
GGML_UNUSED(reg);
1221-
GGML_UNUSED(index);
12221240
}
12231241

12241242
static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
12251243
/* .get_name = */ ggml_backend_cpu_reg_get_name,
12261244
/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
12271245
/* .get_device = */ ggml_backend_cpu_reg_get_device,
1228-
/* .get_proc_address = */ NULL,
1246+
/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
12291247
};
12301248

12311249
ggml_backend_reg_t ggml_backend_cpu_reg(void) {

0 commit comments

Comments
 (0)