Skip to content

Commit 276e5b7

Browse files
slarenjart
authored andcommitted
Unmap the file in llama_free
1 parent d68c5dc commit 276e5b7

File tree

1 file changed

+31
-10
lines changed

1 file changed

+31
-10
lines changed

llama.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ struct llama_model {
149149
// the model memory buffer
150150
std::vector<uint8_t> buf;
151151

152+
// model memory mapped file
153+
void * mm_addr;
154+
size_t mm_length;
155+
152156
// tensors
153157
int n_loaded;
154158
std::unordered_map<std::string, struct ggml_tensor *> tensors;
@@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() {
296300
// model loading
297301
//
298302

299-
static void * mmap_file(const char* fname) {
303+
static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) {
300304
#if defined(MAP_FAILED)
301-
// POSIX mmap
305+
// POSIX
302306
int fd = open(fname, O_RDONLY);
303-
size_t len = lseek(fd, 0, SEEK_END);
304-
void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
307+
mm_length = lseek(fd, 0, SEEK_END);
308+
mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0);
309+
close(fd);
305310
if (mm_addr == MAP_FAILED) {
306311
perror("mmap failed");
307312
mm_addr = NULL;
313+
mm_length = 0;
308314
}
309-
close(fd);
310-
return mm_addr;
311315
#else
312316
// TODO: windows support
313317
(void)(fname); // suppress warnings
314-
return NULL;
318+
#endif
319+
}
320+
321+
static void munmap_file(void * addr, size_t length) {
322+
#if defined(MAP_FAILED)
323+
// POSIX
324+
munmap(addr, length);
325+
#else
326+
// TODO: windows support
327+
(void)(addr); // suppress warnings
328+
(void)(length);
315329
#endif
316330
}
317331

@@ -480,12 +494,15 @@ static bool llama_model_load(
480494
bool use_mmap = (n_parts == 1);
481495

482496
// try to memory map the model file
483-
void* mm_addr = NULL;
497+
void * mm_addr = NULL;
484498
if (use_mmap) {
485-
mm_addr = mmap_file(fname.c_str());
486-
if (mm_addr == NULL) {
499+
mmap_file(fname.c_str(), model.mm_addr, model.mm_length);
500+
if (model.mm_addr == NULL) {
487501
use_mmap = false;
488502
}
503+
else {
504+
mm_addr = model.mm_addr;
505+
}
489506
}
490507

491508
auto & ctx = model.ctx;
@@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) {
17501767
ggml_free(ctx->model.ctx);
17511768
}
17521769

1770+
if (ctx->model.mm_addr) {
1771+
munmap_file(ctx->model.mm_addr, ctx->model.mm_length);
1772+
}
1773+
17531774
delete ctx;
17541775
}
17551776

0 commit comments

Comments
 (0)