Skip to content

Commit d015bdb

Browse files
committed
Add mul_mat_q option
1 parent f6a7850 commit d015bdb

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

llama_cpp/llama.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ def __init__(
227227
rope_freq_scale: float = 1.0,
228228
n_gqa: Optional[int] = None, # (TEMPORARY) must be 8 for llama2 70b
229229
rms_norm_eps: Optional[float] = None, # (TEMPORARY)
230+
mul_mat_q: Optional(bool) = None, # (TEMPORARY)
230231
verbose: bool = True,
231232
):
232233
"""Load a llama.cpp model from `model_path`.
@@ -293,6 +294,9 @@ def __init__(
293294
if rms_norm_eps is not None:
294295
self.params.rms_norm_eps = rms_norm_eps
295296

297+
if mul_mat_q is not None:
298+
self.params.mul_mat_q = mul_mat_q
299+
296300
self.last_n_tokens_size = last_n_tokens_size
297301
self.n_batch = min(n_ctx, n_batch)
298302

llama_cpp/server/app.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class Settings(BaseSettings):
103103
default=None,
104104
description="TEMPORARY",
105105
)
106+
mul_mat_q: Optional[bool] = Field(
107+
default=None,
108+
description="TEMPORARY",
109+
)
106110

107111

108112
class ErrorResponse(TypedDict):

0 commit comments

Comments
 (0)