Skip to content

Commit cd4167b

Browse files
committed
llama_mmap : avoid unmapping the same fragments again in the destructor
1 parent 6a72c7f commit cd4167b

File tree

1 file changed

+76
-46
lines changed

1 file changed

+76
-46
lines changed

llama.cpp

Lines changed: 76 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -816,33 +816,22 @@ struct llama_mmap {
816816

817817
llama_mmap(const llama_mmap &) = delete;
818818

819-
static void align_offset(size_t * offset, size_t * len, size_t page_size) {
820-
// align offset to the next page
821-
size_t offset_in_page = *offset & (page_size - 1);
822-
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
823-
*offset += offset_to_page;
824-
825-
if (offset_to_page >= *len) {
826-
*len = 0;
827-
} else {
828-
*len -= offset_to_page;
829-
// align len to the previous page
830-
*len -= *len & (page_size - 1);
831-
}
832-
}
833-
834819
#ifdef _POSIX_MAPPED_FILES
835820
static constexpr bool SUPPORTED = true;
836821

822+
// list of mapped fragments (first_offset, last_offset)
823+
std::vector<std::pair<size_t, size_t>> mapped_fragments;
824+
837825
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
838826
size = file->size;
839827
int fd = fileno(file->fp);
840828
int flags = MAP_SHARED;
841829
// prefetch/readahead impairs performance on NUMA systems
842830
if (numa) { prefetch = 0; }
843831
#ifdef __linux__
832+
// advise the kernel to read the file sequentially (increases readahead)
844833
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
845-
fprintf(stderr, "warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
834+
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
846835
strerror(errno));
847836
}
848837
if (prefetch) { flags |= MAP_POPULATE; }
@@ -853,42 +842,91 @@ struct llama_mmap {
853842
}
854843

855844
if (prefetch > 0) {
856-
// Advise the kernel to preload the mapped memory
845+
// advise the kernel to preload the mapped memory
857846
if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
858-
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
847+
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
859848
strerror(errno));
860849
}
861850
}
862851
if (numa) {
863852
// advise the kernel not to use readahead
864853
// (because the next page might not belong on the same node)
865854
if (posix_madvise(addr, file->size, POSIX_MADV_RANDOM)) {
866-
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
855+
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
867856
strerror(errno));
868857
}
869858
}
859+
860+
// initialize list of mapped_fragments
861+
mapped_fragments.emplace_back(0, file->size);
862+
}
863+
864+
static void align_range(size_t * first, size_t * last, size_t page_size) {
865+
// align first to the next page
866+
size_t offset_in_page = *first & (page_size - 1);
867+
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
868+
*first += offset_to_page;
869+
870+
// align last to the previous page
871+
*last = *last & ~(page_size - 1);
872+
873+
if (*last <= *first) {
874+
*last = *first;
875+
}
870876
}
871877

872-
void unmap(size_t offset, size_t len) {
878+
// partially unmap the file in the range [first, last)
879+
void unmap_fragment(size_t first, size_t last) {
880+
// note: this function must not be called multiple times with overlapping ranges
881+
// otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
873882
int page_size = sysconf(_SC_PAGESIZE);
874-
align_offset(&offset, &len, page_size);
875-
if (len < (size_t)page_size) {
883+
align_range(&first, &last, page_size);
884+
size_t len = last - first;
885+
886+
if (len == 0) {
876887
return;
877888
}
878889

879-
void * next_page_start = (uint8_t *) addr + offset;
880-
// unmap and discard the pages
890+
GGML_ASSERT(first % page_size == 0);
891+
GGML_ASSERT(last % page_size == 0);
892+
GGML_ASSERT(last > first);
893+
894+
void * next_page_start = (uint8_t *) addr + first;
895+
896+
// unmap the range
881897
if (munmap(next_page_start, len)) {
882-
fprintf(stderr, "warning: munmap failed: %s\n", strerror(errno));
883-
}
884-
if (posix_madvise(next_page_start, len, POSIX_MADV_DONTNEED)) {
885-
fprintf(stderr, "warning: posix_madvise(.., POSIX_MADV_DONTNEED) failed: %s\n",
886-
strerror(errno));
898+
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
899+
}
900+
901+
// update the list of mapped fragments to avoid unmapping the same range again in the destructor
902+
std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
903+
for (const auto & frag : mapped_fragments) {
904+
if (frag.first < first && frag.second > last) {
905+
// the range is in the middle of the fragment, split it
906+
new_mapped_fragments.emplace_back(frag.first, first);
907+
new_mapped_fragments.emplace_back(last, frag.second);
908+
} else if (frag.first < first && frag.second > first) {
909+
// the range starts in the middle of the fragment
910+
new_mapped_fragments.emplace_back(frag.first, first);
911+
} else if (frag.first < last && frag.second > last) {
912+
// the range ends in the middle of the fragment
913+
new_mapped_fragments.emplace_back(last, frag.second);
914+
} else if (frag.first >= first && frag.second <= last) {
915+
// the range covers the entire fragment
916+
} else {
917+
// the range is outside the fragment
918+
new_mapped_fragments.push_back(frag);
919+
}
887920
}
921+
mapped_fragments = std::move(new_mapped_fragments);
888922
}
889923

890924
~llama_mmap() {
891-
munmap(addr, size);
925+
for (const auto & frag : mapped_fragments) {
926+
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
927+
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
928+
}
929+
}
892930
}
893931
#elif defined(_WIN32)
894932
static constexpr bool SUPPORTED = true;
@@ -936,18 +974,10 @@ struct llama_mmap {
936974
}
937975
}
938976

939-
void unmap(size_t offset, size_t len) {
940-
SYSTEM_INFO si;
941-
GetSystemInfo(&si);
942-
DWORD page_size = si.dwAllocationGranularity;
943-
align_offset(&offset, &len, page_size);
944-
945-
if (len < (size_t)page_size) {
946-
return;
947-
}
948-
949-
void * next_page_start = (uint8_t *) addr + offset;
950-
VirtualAlloc(next_page_start, len, MEM_RESET, PAGE_NOACCESS);
977+
void unmap_fragment(size_t first, size_t last) {
978+
// not supported
979+
GGML_UNUSED(first);
980+
GGML_UNUSED(last);
951981
}
952982

953983
~llama_mmap() {
@@ -2429,11 +2459,11 @@ struct llama_model_loader {
24292459
size_done += ggml_nbytes(cur);
24302460
}
24312461

2432-
// unmap GPU tensors
2462+
// unmap offloaded tensors and metadata
24332463
if (use_mmap && mapping) {
2434-
// unmap offloaded tensors and metadata
2435-
mapping->unmap(0, mmap_first);
2436-
mapping->unmap(mmap_last, mapping->size - mmap_last);
2464+
mapping->unmap_fragment(0, mmap_first);
2465+
mapping->unmap_fragment(mmap_last, mmap_last);
2466+
mapping->unmap_fragment(mmap_last, mapping->size);
24372467
}
24382468

24392469
if (progress_callback) {

0 commit comments

Comments
 (0)