Skip to content

Commit d9024df

Browse files
Fixed mmap prefetch for GPU offloading
1 parent 86c3219 commit d9024df

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

llama-util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ struct llama_mmap {
219219
// prefetch/readahead impairs performance on NUMA systems
220220
if (numa) { prefetch = 0; }
221221
#ifdef __linux__
222-
if (prefetch) { flags |= MAP_POPULATE; }
222+
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
223223
#endif
224224
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
225225
if (addr == MAP_FAILED) {

llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,12 @@ struct llama_model_loader {
747747

748748
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
749749
size_t data_size = 0;
750-
size_t prefetch_size = 0;
750+
size_t prefetch_size = file_loader->file.size;
751751
size_t lock_size = 0;
752752
for (const llama_load_tensor & lt : tensors_map.tensors) {
753753
data_size += lt.size;
754-
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
755-
prefetch_size += lt.size;
754+
if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
755+
prefetch_size -= lt.size;
756756
}
757757
}
758758

0 commit comments

Comments
 (0)