ggml-org
diff --git a/‎ggml/include/ggml-backend.h
Lines changed: 1 addition & 0 deletions b/‎ggml/include/ggml-backend.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/include/ggml-blas.h
Lines changed: 2 additions & 0 deletions b/‎ggml/include/ggml-blas.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/CMakeLists.txt
Lines changed: 8 additions & 6 deletions b/‎ggml/src/CMakeLists.txt
Lines changed: 8 additions & 6 deletions
diff --git a/‎ggml/src/ggml-backend-impl.h
Lines changed: 3 additions & 11 deletions b/‎ggml/src/ggml-backend-impl.h
Lines changed: 3 additions & 11 deletions
diff --git a/‎ggml/src/ggml-backend.cpp
Lines changed: 21 additions & 3 deletions b/‎ggml/src/ggml-backend.cpp
Lines changed: 21 additions & 3 deletions
@@ -168,6 +168,7 @@ extern "C" {
 
     // Functions that may be obtained using ggml_backend_reg_get_proc_address
     typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
+    typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);
 
     //
     // Backend registry
 
@@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
 // for openblas and blis, this will also set the number of threads used for blas operations
 GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
 
+GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);
+
 
 #ifdef  __cplusplus
 }
 
@@ -190,22 +190,24 @@ if (GGML_BLAS)
             # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
             find_package(PkgConfig REQUIRED)
             if (${GGML_BLAS_VENDOR} MATCHES "Generic")
-                pkg_check_modules(DepBLAS REQUIRED blas)
+                pkg_check_modules(DepBLAS blas)
             elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
                 # As of openblas v0.3.22, the 64-bit is named openblas64.pc
                 pkg_check_modules(DepBLAS openblas64)
                 if (NOT DepBLAS_FOUND)
-                    pkg_check_modules(DepBLAS REQUIRED openblas)
+                    pkg_check_modules(DepBLAS openblas)
                 endif()
             elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
-                pkg_check_modules(DepBLAS REQUIRED blis)
+                add_compile_definitions(GGML_BLAS_USE_BLIS)
+                pkg_check_modules(DepBLAS blis)
             elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
-                pkg_check_modules(DepBLAS REQUIRED blas-atlas)
+                pkg_check_modules(DepBLAS blas-atlas)
             elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
-                pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
+                pkg_check_modules(DepBLAS flexiblas_api)
             elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
+                add_compile_definitions(GGML_BLAS_USE_MKL)
                 # all Intel* libraries share the same include path
-                pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
+                pkg_check_modules(DepBLAS mkl-sdl)
             elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
                 # this doesn't provide pkg-config
                 # suggest to assign BLAS_INCLUDE_DIRS on your own
 
@@ -88,6 +88,7 @@ extern "C" {
 
         void (*free)(ggml_backend_t backend);
 
+        // Will be moved to the device interface
         // buffer allocation
         ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
 
@@ -112,17 +113,9 @@ extern "C" {
 
         // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
         //            new backends should implement the device interface instead
-
         // These functions are being moved to the device interface
-        // check if the backend can compute an operation
         bool (*supports_op)  (ggml_backend_t backend, const struct ggml_tensor * op);
-
-        // check if the backend can use tensors allocated in a buffer type
         bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
-
-        // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
-        // these should be expensive operations with large batch sizes that may benefit from running on this backend
-        // even if the weight has to be copied from the CPU temporarily
         bool (*offload_op)   (ggml_backend_t backend, const struct ggml_tensor * op);
 
         // (optional) event synchronization
@@ -184,9 +177,8 @@ extern "C" {
         // check if the backend can use tensors allocated in a buffer type
         bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
 
-        // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
-        // these should be expensive operations with large batch sizes that may benefit from running on this backend
-        // even if the weight has to be copied from the CPU temporarily
+        // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
+        // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
         bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
 
         // (optional) event synchronization
 
@@ -495,7 +495,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
 }
 
 bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
-    return device->iface.offload_op(device, op);
+    if (device->iface.offload_op != NULL) {
+        return device->iface.offload_op(device, op);
+    }
+
+    return false;
 }
 
 // Backend (reg)
@@ -525,6 +529,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
 #include "ggml-cuda.h"
 #endif
 
+#ifdef GGML_USE_BLAS
+#include "ggml-blas.h"
+#endif
+
 struct ggml_backend_registry {
     std::vector<ggml_backend_reg_t> backends;
     std::vector<ggml_backend_dev_t> devices;
@@ -534,6 +542,10 @@ struct ggml_backend_registry {
         register_backend(ggml_backend_cuda_reg());
 #endif
 
+#ifdef GGML_USE_BLAS
+        register_backend(ggml_backend_blas_reg());
+#endif
+
         register_backend(ggml_backend_cpu_reg());
 
         // TODO: sycl, metal, vulkan, kompute, cann
@@ -1216,16 +1228,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
     };
 
     return &ggml_backend_cpu_device;
+}
+
+static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+    if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
+        return (void *)ggml_backend_cpu_set_n_threads;
+    }
+    return NULL;
 
     GGML_UNUSED(reg);
-    GGML_UNUSED(index);
 }
 
 static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
     /* .get_name         = */ ggml_backend_cpu_reg_get_name,
     /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
     /* .get_device       = */ ggml_backend_cpu_reg_get_device,
-    /* .get_proc_address = */ NULL,
+    /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
 };
 
 ggml_backend_reg_t ggml_backend_cpu_reg(void) {
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);`
`17`	`17`	`// for openblas and blis, this will also set the number of threads used for blas operations`
`18`	`18`	`GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);`
`19`	`19`
	`20`	`+GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);`
	`21`	`+`
`20`	`22`
`21`	`23`	`#ifdef __cplusplus`
`22`	`24`	`}`