Skip to content

Commit 731d688

Browse files
committed
protyping the idea that supports running on CPU for a GGML_USE_CUBLAS=on build
1 parent 6e08281 commit 731d688

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

ggml-cuda.cu

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5724,6 +5724,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
57245724
CUDA_CHECK(cudaFree(ptr));
57255725
}
57265726

5727+
static bool g_cublas_loaded = false;
5728+
5729+
bool ggml_cublas_loaded(void) {
5730+
return g_cublas_loaded;
5731+
}
57275732

57285733
void ggml_init_cublas() {
57295734
static bool initialized = false;
@@ -5737,7 +5742,12 @@ void ggml_init_cublas() {
57375742
CUDA_CHECK(cudaDeviceSynchronize());
57385743
#endif
57395744

5740-
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
5745+
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
5746+
initialized = true;
5747+
g_cublas_loaded = false;
5748+
return;
5749+
}
5750+
57415751
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
57425752
int64_t total_vram = 0;
57435753
#if defined(GGML_CUDA_FORCE_MMQ)
@@ -7059,6 +7069,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
70597069
}
70607070

70617071
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
7072+
if (!g_cublas_loaded) = return false;
7073+
70627074
const int64_t ne10 = src1->ne[0];
70637075

70647076
const int64_t ne0 = dst->ne[0];
@@ -7722,6 +7734,8 @@ void ggml_cuda_free_scratch() {
77227734
}
77237735

77247736
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
7737+
if (!g_cublas_loaded) return false;
7738+
77257739
ggml_cuda_func_t func;
77267740
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
77277741
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))

ggml-cuda.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ extern "C" {
1818
#define GGML_CUDA_MAX_DEVICES 16
1919

2020
GGML_API void ggml_init_cublas(void);
21+
GGML_API bool ggml_cublas_loaded(void);
22+
2123
GGML_API void * ggml_cuda_host_malloc(size_t size);
2224
GGML_API void ggml_cuda_host_free(void * ptr);
2325

ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19981,7 +19981,7 @@ int ggml_cpu_has_blas(void) {
1998119981

1998219982
int ggml_cpu_has_cublas(void) {
1998319983
#if defined(GGML_USE_CUBLAS)
19984-
return 1;
19984+
return ggml_cublas_loaded();
1998519985
#else
1998619986
return 0;
1998719987
#endif

0 commit comments

Comments
 (0)