Skip to content

Commit 59ee00a

Browse files
committed
fix mmap usage when using host buffers
1 parent 0f3e091 commit 59ee00a

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/llama.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8903,7 +8903,8 @@ static bool llm_load_tensors(
89038903
bufs.reserve(n_max_backend_buffer);
89048904

89058905
// check if this backend device supports buffer_from_host_ptr
8906-
ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft);
8906+
// when using a host buffer as the CPU bakcend buffer, use the CPU device to prioritize using buffer_from_host_ptr over the host buffer
8907+
ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft == llama_default_buffer_type_cpu(model, true) ? ggml_backend_cpu_buffer_type() : buft);
89078908
bool buffer_from_host_ptr_supported = false;
89088909
if (dev) {
89098910
ggml_backend_dev_props props;

0 commit comments

Comments
 (0)