@@ -5724,11 +5724,6 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
5724
5724
CUDA_CHECK (cudaFree (ptr));
5725
5725
}
5726
5726
5727
- static bool g_cublas_loaded = false ;
5728
-
5729
- bool ggml_cublas_loaded (void ) {
5730
- return g_cublas_loaded;
5731
- }
5732
5727
5733
5728
void ggml_init_cublas () {
5734
5729
static bool initialized = false ;
@@ -5742,12 +5737,7 @@ void ggml_init_cublas() {
5742
5737
CUDA_CHECK (cudaDeviceSynchronize ());
5743
5738
#endif
5744
5739
5745
- if (cudaGetDeviceCount (&g_device_count) != cudaSuccess) {
5746
- initialized = true ;
5747
- g_cublas_loaded = false ;
5748
- return ;
5749
- }
5750
-
5740
+ CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
5751
5741
GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
5752
5742
int64_t total_vram = 0 ;
5753
5743
#if defined(GGML_CUDA_FORCE_MMQ)
@@ -5795,7 +5785,6 @@ void ggml_init_cublas() {
5795
5785
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
5796
5786
5797
5787
initialized = true ;
5798
- g_cublas_loaded = true ;
5799
5788
}
5800
5789
}
5801
5790
@@ -7070,8 +7059,6 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
7070
7059
}
7071
7060
7072
7061
bool ggml_cuda_can_mul_mat (const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
7073
- if (!g_cublas_loaded) return false ;
7074
-
7075
7062
const int64_t ne10 = src1->ne [0 ];
7076
7063
7077
7064
const int64_t ne0 = dst->ne [0 ];
@@ -7735,8 +7722,6 @@ void ggml_cuda_free_scratch() {
7735
7722
}
7736
7723
7737
7724
bool ggml_cuda_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor) {
7738
- if (!g_cublas_loaded) return false ;
7739
-
7740
7725
ggml_cuda_func_t func;
7741
7726
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
7742
7727
|| (tensor->src [0 ] != nullptr && (tensor->src [0 ]->backend == GGML_BACKEND_GPU || tensor->src [0 ]->backend == GGML_BACKEND_GPU_SPLIT))
0 commit comments