We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0f3e091 commit 59ee00aCopy full SHA for 59ee00a
src/llama.cpp
@@ -8903,7 +8903,8 @@ static bool llm_load_tensors(
8903
bufs.reserve(n_max_backend_buffer);
8904
8905
// check if this backend device supports buffer_from_host_ptr
8906
- ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft);
+ // when using a host buffer as the CPU bakcend buffer, use the CPU device to prioritize using buffer_from_host_ptr over the host buffer
8907
+ ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft == llama_default_buffer_type_cpu(model, true) ? ggml_backend_cpu_buffer_type() : buft);
8908
bool buffer_from_host_ptr_supported = false;
8909
if (dev) {
8910
ggml_backend_dev_props props;
0 commit comments