We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ee64dc6 commit 06b946eCopy full SHA for 06b946e
examples/models/llama/model_args.py
@@ -14,7 +14,7 @@ class ModelArgs:
14
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
15
ffn_dim_multiplier: Optional[float] = None
16
norm_eps: float = 1e-5
17
- max_batch_size: int = 32
+ max_batch_size: int = 1
18
max_seq_len: int = 2048
19
max_context_len: int = 2048
20
moe: bool = False # True to enable the MoE (Mixture of Experts)
0 commit comments