@@ -149,6 +149,10 @@ struct llama_model {
149
149
// the model memory buffer
150
150
std::vector<uint8_t > buf;
151
151
152
+ // model memory mapped file
153
+ void * mm_addr;
154
+ size_t mm_length;
155
+
152
156
// tensors
153
157
int n_loaded;
154
158
std::unordered_map<std::string, struct ggml_tensor *> tensors;
@@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() {
296
300
// model loading
297
301
//
298
302
299
- static void * mmap_file (const char * fname) {
303
+ static void mmap_file (const char * fname, void * &mm_addr, size_t &mm_length ) {
300
304
#if defined(MAP_FAILED)
301
- // POSIX mmap
305
+ // POSIX
302
306
int fd = open (fname, O_RDONLY);
303
- size_t len = lseek (fd, 0 , SEEK_END);
304
- void * mm_addr = mmap (NULL , len, PROT_READ, MAP_SHARED, fd, 0 );
307
+ mm_length = lseek (fd, 0 , SEEK_END);
308
+ mm_addr = mmap (NULL , mm_length, PROT_READ, MAP_SHARED, fd, 0 );
309
+ close (fd);
305
310
if (mm_addr == MAP_FAILED) {
306
311
perror (" mmap failed" );
307
312
mm_addr = NULL ;
313
+ mm_length = 0 ;
308
314
}
309
- close (fd);
310
- return mm_addr;
311
315
#else
312
316
// TODO: windows support
313
317
(void )(fname); // suppress warnings
314
- return NULL ;
318
+ #endif
319
+ }
320
+
321
+ static void munmap_file (void * addr, size_t length) {
322
+ #if defined(MAP_FAILED)
323
+ // POSIX
324
+ munmap (addr, length);
325
+ #else
326
+ // TODO: windows support
327
+ (void )(addr); // suppress warnings
328
+ (void )(length);
315
329
#endif
316
330
}
317
331
@@ -480,12 +494,15 @@ static bool llama_model_load(
480
494
bool use_mmap = (n_parts == 1 );
481
495
482
496
// try to memory map the model file
483
- void * mm_addr = NULL ;
497
+ void * mm_addr = NULL ;
484
498
if (use_mmap) {
485
- mm_addr = mmap_file (fname.c_str ());
486
- if (mm_addr == NULL ) {
499
+ mmap_file (fname.c_str (), model. mm_addr , model. mm_length );
500
+ if (model. mm_addr == NULL ) {
487
501
use_mmap = false ;
488
502
}
503
+ else {
504
+ mm_addr = model.mm_addr ;
505
+ }
489
506
}
490
507
491
508
auto & ctx = model.ctx ;
@@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) {
1750
1767
ggml_free (ctx->model .ctx );
1751
1768
}
1752
1769
1770
+ if (ctx->model .mm_addr ) {
1771
+ munmap_file (ctx->model .mm_addr , ctx->model .mm_length );
1772
+ }
1773
+
1753
1774
delete ctx;
1754
1775
}
1755
1776
0 commit comments