Update on "allow customized head_dim"

helunwencser · helunwencser · commit 2a1ace57b0cb · 2024-11-18T13:34:09.000-08:00
This is for resolving the ask in this [post](https://fb.workplace.com/groups/pytorch.edge.users/permalink/1574875706716050/). Similar change in HF: huggingface/transformers#32502 Differential Revision: [D65974454](https://our.internmc.facebook.com/intern/diff/D65974454/) [ghstack-poisoned]
diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py
@@ -85,7 +85,7 @@ class ModelArgs:
     n_kv_heads: Optional[int] = None
     vocab_size: int = -1  # defined later by tokenizer
     hidden_dim: Optional[int] = None
-    head_dim: Optional[int] = None
+    head_dim: Optional[int] = None # Optional customized head_dim
     multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
     ffn_dim_multiplier: Optional[float] = None
     norm_eps: float = 1e-5