@@ -816,33 +816,22 @@ struct llama_mmap {
816
816
817
817
llama_mmap (const llama_mmap &) = delete ;
818
818
819
- static void align_offset (size_t * offset, size_t * len, size_t page_size) {
820
- // align offset to the next page
821
- size_t offset_in_page = *offset & (page_size - 1 );
822
- size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
823
- *offset += offset_to_page;
824
-
825
- if (offset_to_page >= *len) {
826
- *len = 0 ;
827
- } else {
828
- *len -= offset_to_page;
829
- // align len to the previous page
830
- *len -= *len & (page_size - 1 );
831
- }
832
- }
833
-
834
819
#ifdef _POSIX_MAPPED_FILES
835
820
static constexpr bool SUPPORTED = true ;
836
821
822
+ // list of mapped fragments (first_offset, last_offset)
823
+ std::vector<std::pair<size_t , size_t >> mapped_fragments;
824
+
837
825
llama_mmap (struct llama_file * file, size_t prefetch = (size_t ) -1 /* -1 = max value */ , bool numa = false) {
838
826
size = file->size ;
839
827
int fd = fileno (file->fp );
840
828
int flags = MAP_SHARED;
841
829
// prefetch/readahead impairs performance on NUMA systems
842
830
if (numa) { prefetch = 0 ; }
843
831
#ifdef __linux__
832
+ // advise the kernel to read the file sequentially (increases readahead)
844
833
if (posix_fadvise (fd, 0 , 0 , POSIX_FADV_SEQUENTIAL)) {
845
- fprintf (stderr, " warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n " ,
834
+ LLAMA_LOG_WARN ( " warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n " ,
846
835
strerror (errno));
847
836
}
848
837
if (prefetch) { flags |= MAP_POPULATE; }
@@ -853,42 +842,91 @@ struct llama_mmap {
853
842
}
854
843
855
844
if (prefetch > 0 ) {
856
- // Advise the kernel to preload the mapped memory
845
+ // advise the kernel to preload the mapped memory
857
846
if (posix_madvise (addr, std::min (file->size , prefetch), POSIX_MADV_WILLNEED)) {
858
- fprintf (stderr, " warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n " ,
847
+ LLAMA_LOG_WARN ( " warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n " ,
859
848
strerror (errno));
860
849
}
861
850
}
862
851
if (numa) {
863
852
// advise the kernel not to use readahead
864
853
// (because the next page might not belong on the same node)
865
854
if (posix_madvise (addr, file->size , POSIX_MADV_RANDOM)) {
866
- fprintf (stderr, " warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n " ,
855
+ LLAMA_LOG_WARN ( " warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n " ,
867
856
strerror (errno));
868
857
}
869
858
}
859
+
860
+ // initialize list of mapped_fragments
861
+ mapped_fragments.emplace_back (0 , file->size );
862
+ }
863
+
864
+ static void align_range (size_t * first, size_t * last, size_t page_size) {
865
+ // align first to the next page
866
+ size_t offset_in_page = *first & (page_size - 1 );
867
+ size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
868
+ *first += offset_to_page;
869
+
870
+ // align last to the previous page
871
+ *last = *last & ~(page_size - 1 );
872
+
873
+ if (*last <= *first) {
874
+ *last = *first;
875
+ }
870
876
}
871
877
872
- void unmap (size_t offset, size_t len) {
878
+ // partially unmap the file in the range [first, last)
879
+ void unmap_fragment (size_t first, size_t last) {
880
+ // note: this function must not be called multiple times with overlapping ranges
881
+ // otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
873
882
int page_size = sysconf (_SC_PAGESIZE);
874
- align_offset (&offset, &len, page_size);
875
- if (len < (size_t )page_size) {
883
+ align_range (&first, &last, page_size);
884
+ size_t len = last - first;
885
+
886
+ if (len == 0 ) {
876
887
return ;
877
888
}
878
889
879
- void * next_page_start = (uint8_t *) addr + offset;
880
- // unmap and discard the pages
890
+ GGML_ASSERT (first % page_size == 0 );
891
+ GGML_ASSERT (last % page_size == 0 );
892
+ GGML_ASSERT (last > first);
893
+
894
+ void * next_page_start = (uint8_t *) addr + first;
895
+
896
+ // unmap the range
881
897
if (munmap (next_page_start, len)) {
882
- fprintf (stderr, " warning: munmap failed: %s\n " , strerror (errno));
883
- }
884
- if (posix_madvise (next_page_start, len, POSIX_MADV_DONTNEED)) {
885
- fprintf (stderr, " warning: posix_madvise(.., POSIX_MADV_DONTNEED) failed: %s\n " ,
886
- strerror (errno));
898
+ LLAMA_LOG_WARN (" warning: munmap failed: %s\n " , strerror (errno));
899
+ }
900
+
901
+ // update the list of mapped fragments to avoid unmapping the same range again in the destructor
902
+ std::vector<std::pair<size_t , size_t >> new_mapped_fragments;
903
+ for (const auto & frag : mapped_fragments) {
904
+ if (frag.first < first && frag.second > last) {
905
+ // the range is in the middle of the fragment, split it
906
+ new_mapped_fragments.emplace_back (frag.first , first);
907
+ new_mapped_fragments.emplace_back (last, frag.second );
908
+ } else if (frag.first < first && frag.second > first) {
909
+ // the range starts in the middle of the fragment
910
+ new_mapped_fragments.emplace_back (frag.first , first);
911
+ } else if (frag.first < last && frag.second > last) {
912
+ // the range ends in the middle of the fragment
913
+ new_mapped_fragments.emplace_back (last, frag.second );
914
+ } else if (frag.first >= first && frag.second <= last) {
915
+ // the range covers the entire fragment
916
+ } else {
917
+ // the range is outside the fragment
918
+ new_mapped_fragments.push_back (frag);
919
+ }
887
920
}
921
+ mapped_fragments = std::move (new_mapped_fragments);
888
922
}
889
923
890
924
~llama_mmap () {
891
- munmap (addr, size);
925
+ for (const auto & frag : mapped_fragments) {
926
+ if (munmap ((char *) addr + frag.first , frag.second - frag.first )) {
927
+ LLAMA_LOG_WARN (" warning: munmap failed: %s\n " , strerror (errno));
928
+ }
929
+ }
892
930
}
893
931
#elif defined(_WIN32)
894
932
static constexpr bool SUPPORTED = true ;
@@ -936,18 +974,10 @@ struct llama_mmap {
936
974
}
937
975
}
938
976
939
- void unmap (size_t offset, size_t len) {
940
- SYSTEM_INFO si;
941
- GetSystemInfo (&si);
942
- DWORD page_size = si.dwAllocationGranularity ;
943
- align_offset (&offset, &len, page_size);
944
-
945
- if (len < (size_t )page_size) {
946
- return ;
947
- }
948
-
949
- void * next_page_start = (uint8_t *) addr + offset;
950
- VirtualAlloc (next_page_start, len, MEM_RESET, PAGE_NOACCESS);
977
+ void unmap_fragment (size_t first, size_t last) {
978
+ // not supported
979
+ GGML_UNUSED (first);
980
+ GGML_UNUSED (last);
951
981
}
952
982
953
983
~llama_mmap () {
@@ -2429,11 +2459,11 @@ struct llama_model_loader {
2429
2459
size_done += ggml_nbytes (cur);
2430
2460
}
2431
2461
2432
- // unmap GPU tensors
2462
+ // unmap offloaded tensors and metadata
2433
2463
if (use_mmap && mapping) {
2434
- // unmap offloaded tensors and metadata
2435
- mapping->unmap ( 0 , mmap_first );
2436
- mapping->unmap (mmap_last, mapping->size - mmap_last );
2464
+ mapping-> unmap_fragment ( 0 , mmap_first);
2465
+ mapping->unmap_fragment (mmap_last, mmap_last );
2466
+ mapping->unmap_fragment (mmap_last, mapping->size );
2437
2467
}
2438
2468
2439
2469
if (progress_callback) {
0 commit comments