Skip to content

Commit ca8fa3b

Browse files
committed
update
1 parent ad5856d commit ca8fa3b

File tree

1 file changed

+36
-18
lines changed

1 file changed

+36
-18
lines changed

llama.cpp

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -570,19 +570,37 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
570570
// llama helpers
571571
//
572572

573+
static inline void * llama_host_malloc(size_t n) {
573574
#ifdef GGML_USE_CUBLAS
574-
# define llama_host_malloc(n) ggml_cuda_host_malloc(n)
575-
# define llama_host_free(data) ggml_cuda_host_free(data)
575+
if (ggml_cublas_loaded()) {
576+
return ggml_cuda_host_malloc(n);
577+
} else {
578+
return malloc(n);
579+
}
580+
#elif GGML_USE_METAL
581+
return ggml_metal_host_malloc(n);
582+
#elif GGML_USE_CPU_HBM
583+
return hbw_malloc(n)
584+
#else
585+
return malloc(n)
586+
#endif
587+
}
588+
589+
static inline void llama_host_free(void * ptr) {
590+
#ifdef GGML_USE_CUBLAS
591+
if (ggml_cublas_loaded()) {
592+
return ggml_cuda_host_free(ptr);
593+
} else {
594+
return free(ptr);
595+
}
576596
#elif GGML_USE_METAL
577-
# define llama_host_malloc(n) ggml_metal_host_malloc(n)
578-
# define llama_host_free(data) ggml_metal_host_free(data)
597+
return ggml_metal_host_free(ptr);
579598
#elif GGML_USE_CPU_HBM
580-
# define llama_host_malloc(n) hbw_malloc(n)
581-
# define llama_host_free(data) if (data != NULL) hbw_free(data)
599+
return hbw_free(ptr)
582600
#else
583-
# define llama_host_malloc(n) malloc(n)
584-
# define llama_host_free(data) free(data)
601+
return free(ptr)
585602
#endif
603+
}
586604

587605
#if defined(_WIN32)
588606
static std::string llama_format_win_err(DWORD err) {
@@ -1160,12 +1178,12 @@ struct llama_kv_cache {
11601178
ggml_free(ctx);
11611179
}
11621180

1163-
if (ggml_cpu_has_cublas()) {
11641181
#ifdef GGML_USE_CUBLAS
1182+
if (ggml_cublas_loaded()) {
11651183
ggml_cuda_free_data(k);
11661184
ggml_cuda_free_data(v);
1167-
#endif
11681185
}
1186+
#endif
11691187
}
11701188
};
11711189

@@ -1264,14 +1282,14 @@ struct llama_model {
12641282
ggml_free(ctx);
12651283
}
12661284

1267-
if (ggml_cpu_has_cublas()) {
12681285
#ifdef GGML_USE_CUBLAS
1286+
if (ggml_cublas_loaded()) {
12691287
for (size_t i = 0; i < tensors_by_name.size(); ++i) {
12701288
ggml_cuda_free_data(tensors_by_name[i].second);
12711289
}
12721290
ggml_cuda_free_scratch();
1273-
#endif
12741291
}
1292+
#endif
12751293

12761294
#if defined(GGML_USE_CLBLAST)
12771295
for (size_t i = 0; i < tensors_by_name.size(); ++i) {
@@ -1386,8 +1404,8 @@ static bool llama_kv_cache_init(
13861404

13871405
(void) n_gpu_layers;
13881406

1389-
if (ggml_cpu_has_cublas()) {
13901407
#ifdef GGML_USE_CUBLAS
1408+
if (ggml_cublas_loaded()) {
13911409
size_t vram_kv_cache = 0;
13921410

13931411
if (n_gpu_layers > (int)n_layer + 1) {
@@ -1403,8 +1421,8 @@ static bool llama_kv_cache_init(
14031421
if (vram_kv_cache > 0) {
14041422
LLAMA_LOG_INFO("%s: VRAM kv self = %.2f MB\n", __func__, vram_kv_cache / 1024.0 / 1024.0);
14051423
}
1406-
#endif
14071424
}
1425+
#endif
14081426

14091427
return true;
14101428
}
@@ -2468,19 +2486,19 @@ static void llm_load_tensors(
24682486
enum ggml_backend_type llama_backend_offload = GGML_BACKEND_CPU;
24692487
enum ggml_backend_type llama_backend_offload_split = GGML_BACKEND_CPU;
24702488

2471-
if (ggml_cpu_has_cublas()) {
24722489
#ifdef GGML_USE_CUBLAS
2490+
if (ggml_cublas_loaded()) {
24732491
LLAMA_LOG_INFO("%s: using " GGML_CUDA_NAME " for GPU acceleration\n", __func__);
24742492
ggml_cuda_set_main_device(main_gpu);
24752493

24762494
llama_backend_offload = GGML_BACKEND_GPU;
24772495
llama_backend_offload_split = GGML_BACKEND_GPU_SPLIT;
2478-
#endif
2479-
} else if (ggml_cpu_has_clblast()) {
2496+
}
2497+
#elif GGML_USE_CLBLAST
24802498
LLAMA_LOG_INFO("%s: using OpenCL for GPU acceleration\n", __func__);
24812499
llama_backend_offload = GGML_BACKEND_GPU;
24822500
llama_backend_offload_split = GGML_BACKEND_GPU;
2483-
}
2501+
#endif
24842502

24852503
// prepare memory for the weights
24862504
size_t vram_weights = 0;

0 commit comments

Comments
 (0)