We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 1c87ce3 + fa35c88 commit 2a1ace5Copy full SHA for 2a1ace5
examples/models/llama/llama_transformer.py
@@ -85,7 +85,7 @@ class ModelArgs:
85
n_kv_heads: Optional[int] = None
86
vocab_size: int = -1 # defined later by tokenizer
87
hidden_dim: Optional[int] = None
88
- head_dim: Optional[int] = None
+ head_dim: Optional[int] = None # Optional customized head_dim
89
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
90
ffn_dim_multiplier: Optional[float] = None
91
norm_eps: float = 1e-5
0 commit comments