Skip to content

ggml : add backend registry / device interfaces to BLAS backend #9752

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ extern "C" {

// Functions that may be obtained using ggml_backend_reg_get_proc_address
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);

//
// Backend registry
Expand Down
2 changes: 2 additions & 0 deletions ggml/include/ggml-blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
// for openblas and blis, this will also set the number of threads used for blas operations
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);

GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);


#ifdef __cplusplus
}
Expand Down
14 changes: 8 additions & 6 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,22 +190,24 @@ if (GGML_BLAS)
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package(PkgConfig REQUIRED)
if (${GGML_BLAS_VENDOR} MATCHES "Generic")
pkg_check_modules(DepBLAS REQUIRED blas)
pkg_check_modules(DepBLAS blas)
elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
pkg_check_modules(DepBLAS openblas64)
if (NOT DepBLAS_FOUND)
pkg_check_modules(DepBLAS REQUIRED openblas)
pkg_check_modules(DepBLAS openblas)
endif()
elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
pkg_check_modules(DepBLAS REQUIRED blis)
add_compile_definitions(GGML_BLAS_USE_BLIS)
pkg_check_modules(DepBLAS blis)
elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
pkg_check_modules(DepBLAS blas-atlas)
elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
pkg_check_modules(DepBLAS flexiblas_api)
elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
add_compile_definitions(GGML_BLAS_USE_MKL)
# all Intel* libraries share the same include path
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
pkg_check_modules(DepBLAS mkl-sdl)
elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
# this doesn't provide pkg-config
# suggest to assign BLAS_INCLUDE_DIRS on your own
Expand Down
14 changes: 3 additions & 11 deletions ggml/src/ggml-backend-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ extern "C" {

void (*free)(ggml_backend_t backend);

// Will be moved to the device interface
// buffer allocation
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);

Expand All @@ -112,17 +113,9 @@ extern "C" {

// IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
// new backends should implement the device interface instead

// These functions are being moved to the device interface
// check if the backend can compute an operation
bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);

// check if the backend can use tensors allocated in a buffer type
bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);

// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
// these should be expensive operations with large batch sizes that may benefit from running on this backend
// even if the weight has to be copied from the CPU temporarily
bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);

// (optional) event synchronization
Expand Down Expand Up @@ -184,9 +177,8 @@ extern "C" {
// check if the backend can use tensors allocated in a buffer type
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);

// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
// these should be expensive operations with large batch sizes that may benefit from running on this backend
// even if the weight has to be copied from the CPU temporarily
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);

// (optional) event synchronization
Expand Down
27 changes: 22 additions & 5 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
}

bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
return device->iface.offload_op(device, op);
if (device->iface.offload_op != NULL) {
return device->iface.offload_op(device, op);
}

return false;
}

// Backend (reg)
Expand Down Expand Up @@ -534,6 +538,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
#include "ggml-metal.h"
#endif

#ifdef GGML_USE_BLAS
#include "ggml-blas.h"
#endif

struct ggml_backend_registry {
std::vector<ggml_backend_reg_t> backends;
std::vector<ggml_backend_dev_t> devices;
Expand All @@ -545,10 +553,13 @@ struct ggml_backend_registry {
#ifdef GGML_USE_METAL
register_backend(ggml_backend_metal_reg());
#endif

register_backend(ggml_backend_cpu_reg());
#ifdef GGML_USE_BLAS
register_backend(ggml_backend_blas_reg());
#endif

// TODO: sycl, vulkan, kompute, cann

register_backend(ggml_backend_cpu_reg());
}

void register_backend(ggml_backend_reg_t reg) {
Expand Down Expand Up @@ -1229,16 +1240,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
};

return &ggml_backend_cpu_device;
}

static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
return (void *)ggml_backend_cpu_set_n_threads;
}
return NULL;

GGML_UNUSED(reg);
GGML_UNUSED(index);
}

static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
/* .get_name = */ ggml_backend_cpu_reg_get_name,
/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
/* .get_device = */ ggml_backend_cpu_reg_get_device,
/* .get_proc_address = */ NULL,
/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
};

ggml_backend_reg_t ggml_backend_cpu_reg(void) {
Expand Down
Loading
Loading