Skip to content

Commit 620cd2f

Browse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents 5788f1f + 6dfb981 commit 620cd2f

File tree

3 files changed

+3
-1
lines changed

3 files changed

+3
-1
lines changed

llama_cpp/llama.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def __init__(
238238
n_ctx: Maximum context size.
239239
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
240240
seed: Random seed. -1 for random.
241+
n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
241242
f16_kv: Use half-precision for key/value cache.
242243
logits_all: Return logits for all tokens, not just the last token.
243244
vocab_only: Only load the vocabulary no weights.
@@ -266,7 +267,7 @@ def __init__(
266267

267268
self.params = llama_cpp.llama_context_default_params()
268269
self.params.n_ctx = n_ctx
269-
self.params.n_gpu_layers = n_gpu_layers
270+
self.params.n_gpu_layers = 0x7FFFFFFF if n_gpu_layers == -1 else n_gpu_layers # 0x7FFFFFFF is INT32 max, will be auto set to all layers
270271
self.params.seed = seed
271272
self.params.f16_kv = f16_kv
272273
self.params.logits_all = logits_all

llama_cpp/py.typed

Whitespace-only changes.

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
author_email="[email protected]",
1616
license="MIT",
1717
package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},
18+
package_data={"llama_cpp": ["py.typed"]},
1819
packages=["llama_cpp", "llama_cpp.server"],
1920
install_requires=["typing-extensions>=4.5.0", "numpy>=1.20.0", "diskcache>=5.6.1"],
2021
extras_require={

0 commit comments

Comments
 (0)