@@ -23,6 +23,7 @@ def __init__(
23
23
f16_kv : bool = False ,
24
24
logits_all : bool = False ,
25
25
vocab_only : bool = False ,
26
+ use_mmap : bool = True ,
26
27
use_mlock : bool = False ,
27
28
embedding : bool = False ,
28
29
n_threads : Optional [int ] = None ,
@@ -40,6 +41,7 @@ def __init__(
40
41
f16_kv: Use half-precision for key/value cache.
41
42
logits_all: Return logits for all tokens, not just the last token.
42
43
vocab_only: Only load the vocabulary no weights.
44
+ use_mmap: Use mmap if possible.
43
45
use_mlock: Force the system to keep the model in RAM.
44
46
embedding: Embedding mode only.
45
47
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
@@ -63,6 +65,7 @@ def __init__(
63
65
self .params .f16_kv = f16_kv
64
66
self .params .logits_all = logits_all
65
67
self .params .vocab_only = vocab_only
68
+ self .params .use_mmap = use_mmap
66
69
self .params .use_mlock = use_mlock
67
70
self .params .embedding = embedding
68
71
@@ -661,6 +664,7 @@ def __getstate__(self):
661
664
f16_kv = self .params .f16_kv ,
662
665
logits_all = self .params .logits_all ,
663
666
vocab_only = self .params .vocab_only ,
667
+ use_mmap = self .params .use_mmap ,
664
668
use_mlock = self .params .use_mlock ,
665
669
embedding = self .params .embedding ,
666
670
last_n_tokens_size = self .last_n_tokens_size ,
@@ -679,6 +683,7 @@ def __setstate__(self, state):
679
683
f16_kv = state ["f16_kv" ],
680
684
logits_all = state ["logits_all" ],
681
685
vocab_only = state ["vocab_only" ],
686
+ use_mmap = state ["use_mmap" ],
682
687
use_mlock = state ["use_mlock" ],
683
688
embedding = state ["embedding" ],
684
689
n_threads = state ["n_threads" ],
0 commit comments