@@ -5724,6 +5724,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
5724
5724
CUDA_CHECK (cudaFree (ptr));
5725
5725
}
5726
5726
5727
+ static bool g_cublas_loaded = false ;
5728
+
5729
+ bool ggml_cublas_loaded (void ) {
5730
+ return g_cublas_loaded;
5731
+ }
5727
5732
5728
5733
void ggml_init_cublas () {
5729
5734
static bool initialized = false ;
@@ -5737,7 +5742,12 @@ void ggml_init_cublas() {
5737
5742
CUDA_CHECK (cudaDeviceSynchronize ());
5738
5743
#endif
5739
5744
5740
- CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
5745
+ if (cudaGetDeviceCount (&g_device_count) != cudaSuccess) {
5746
+ initialized = true ;
5747
+ g_cublas_loaded = false ;
5748
+ return ;
5749
+ }
5750
+
5741
5751
GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
5742
5752
int64_t total_vram = 0 ;
5743
5753
#if defined(GGML_CUDA_FORCE_MMQ)
@@ -5785,6 +5795,7 @@ void ggml_init_cublas() {
5785
5795
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
5786
5796
5787
5797
initialized = true ;
5798
+ g_cublas_loaded = true ;
5788
5799
}
5789
5800
}
5790
5801
@@ -7059,6 +7070,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
7059
7070
}
7060
7071
7061
7072
bool ggml_cuda_can_mul_mat (const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
7073
+ if (!g_cublas_loaded) return false ;
7074
+
7062
7075
const int64_t ne10 = src1->ne [0 ];
7063
7076
7064
7077
const int64_t ne0 = dst->ne [0 ];
@@ -7722,6 +7735,8 @@ void ggml_cuda_free_scratch() {
7722
7735
}
7723
7736
7724
7737
bool ggml_cuda_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor) {
7738
+ if (!g_cublas_loaded) return false ;
7739
+
7725
7740
ggml_cuda_func_t func;
7726
7741
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
7727
7742
|| (tensor->src [0 ] != nullptr && (tensor->src [0 ]->backend == GGML_BACKEND_GPU || tensor->src [0 ]->backend == GGML_BACKEND_GPU_SPLIT))
0 commit comments