Skip to content

Commit 1f67ad2

Browse files
committed
Add use_mmap option
1 parent d41cb0e commit 1f67ad2

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

llama_cpp/llama.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def __init__(
2323
f16_kv: bool = False,
2424
logits_all: bool = False,
2525
vocab_only: bool = False,
26+
use_mmap: bool = True,
2627
use_mlock: bool = False,
2728
embedding: bool = False,
2829
n_threads: Optional[int] = None,
@@ -40,6 +41,7 @@ def __init__(
4041
f16_kv: Use half-precision for key/value cache.
4142
logits_all: Return logits for all tokens, not just the last token.
4243
vocab_only: Only load the vocabulary no weights.
44+
use_mmap: Use mmap if possible.
4345
use_mlock: Force the system to keep the model in RAM.
4446
embedding: Embedding mode only.
4547
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
@@ -63,6 +65,7 @@ def __init__(
6365
self.params.f16_kv = f16_kv
6466
self.params.logits_all = logits_all
6567
self.params.vocab_only = vocab_only
68+
self.params.use_mmap = use_mmap
6669
self.params.use_mlock = use_mlock
6770
self.params.embedding = embedding
6871

@@ -661,6 +664,7 @@ def __getstate__(self):
661664
f16_kv=self.params.f16_kv,
662665
logits_all=self.params.logits_all,
663666
vocab_only=self.params.vocab_only,
667+
use_mmap=self.params.use_mmap,
664668
use_mlock=self.params.use_mlock,
665669
embedding=self.params.embedding,
666670
last_n_tokens_size=self.last_n_tokens_size,
@@ -679,6 +683,7 @@ def __setstate__(self, state):
679683
f16_kv=state["f16_kv"],
680684
logits_all=state["logits_all"],
681685
vocab_only=state["vocab_only"],
686+
use_mmap=state["use_mmap"],
682687
use_mlock=state["use_mlock"],
683688
embedding=state["embedding"],
684689
n_threads=state["n_threads"],

0 commit comments

Comments
 (0)