Add additional supported models to config/data/models.json (#329)

GregoryComer · web-flow · commit 692e834fbfe8 · 2024-04-20T21:45:16.000-07:00
diff --git a/build/builder.py b/build/builder.py
@@ -14,6 +14,7 @@
 import torch
 import torch._dynamo.config
 import torch._inductor.config
+
 from config.model_config import resolve_model_config
 from quantize import name_to_dtype, quantize_model
 
@@ -76,6 +77,7 @@ def from_args(cls, args):  # -> BuilderArgs:
             checkpoint_dir = args.checkpoint_dir
 
         checkpoint_path = args.checkpoint_path
+        params_table = args.params_table
         if args.model:  # Using a named, well-known model
             model_config = resolve_model_config(args.model)
 
@@ -84,6 +86,9 @@ def from_args(cls, args):  # -> BuilderArgs:
                 / model_config.name
                 / model_config.checkpoint_file
             )
+            # The transformers config is keyed on the last section
+            # of the name/path.
+            params_table = model_config.transformer_params_key or model_config.name.split("/")[-1]
 
         is_chat_model = False
         if args.is_chat_model:
@@ -108,7 +113,7 @@ def from_args(cls, args):  # -> BuilderArgs:
             checkpoint_dir=checkpoint_dir,
             checkpoint_path=checkpoint_path,
             params_path=args.params_path,
-            params_table=args.params_table,
+            params_table=params_table,
             gguf_path=args.gguf_path,
             gguf_kwargs=None,
             dso_path=args.dso_path,
@@ -147,9 +152,8 @@ def from_args(cls, args):  # -> TokenizerArgs:
             tokenizer_path = args.tokenizer_path
         elif args.model:  # Using a named, well-known model
             model_config = resolve_model_config(args.model)
-            tokenizer_path = (
-                Path(args.model_directory) / model_config.name / "tokenizer.model"
-            )
+            tokenizer_path = Path(args.model_directory) / model_config.name / model_config.tokenizer_file
+
         elif args.checkpoint_path:
             tokenizer_path = args.checkpoint_path.parent / "tokenizer.model"
         elif hasattr(args, "checkpoint_dir") and args.checkpoint_dir:
@@ -234,7 +238,7 @@ def _load_model_default(builder_args):
         if builder_args.params_path:
             model = Transformer.from_params(builder_args.params_path)
         elif builder_args.params_table:
-            model = Transformer.from_table(builder_args.params_path)
+            model = Transformer.from_table(builder_args.params_table)
         else:
             model = Transformer.from_name(builder_args.checkpoint_path.parent.name)
 
diff --git a/build/known_model_params/stories42M.json b/build/known_model_params/stories42M.json
@@ -0,0 +1 @@
+{"n_layers": 8, "n_heads": 8, "dim": 512, "hidden_dim": 1376}
diff --git a/build/model.py b/build/model.py
@@ -112,7 +112,6 @@ def from_name(cls, name: str):
         return ModelArgs.from_params(config_path / f"{config[0]}.json")
 
 
-
 class KVCache(nn.Module):
     def __init__(self, max_batch_size, max_seq_length, n_heads, head_dim, dtype=None):
         super().__init__()
diff --git a/config/data/models.json b/config/data/models.json
@@ -1,13 +1,38 @@
 {
-    "meta-llama/Meta-Llama-3-8B-Instruct": {
-        "aliases": ["llama3", "llama3-8b"],
+    "meta-llama/Llama-2-7b-hf": {
+        "aliases": ["llama2", "llama2-7b"],
         "distribution_channel": "HuggingFaceSnapshot",
-        "distribution_path": "meta-llama/Meta-Llama-3-8B-Instruct"
+        "distribution_path": "meta-llama/Llama-2-7b-hf",
+        "transformer_params_key": "7B"
     },
     "meta-llama/Llama-2-7b-chat-hf": {
-        "aliases": ["llama2", "llama2-7b"],
+        "aliases": ["llama2-chat", "llama2-7b-chat"],
+        "distribution_channel": "HuggingFaceSnapshot",
+        "distribution_path": "meta-llama/Llama-2-7b-chat-hf",
+        "transformer_params_key": "7B"
+    },
+    "meta-llama/Llama-2-13b-chat-hf": {
+        "aliases": ["llama2-13b-chat"],
         "distribution_channel": "HuggingFaceSnapshot",
-        "distribution_path": "meta-llama/Llama-2-7b-chat-hf"
+        "distribution_path": "meta-llama/Llama-2-13b-chat-hf",
+        "transformer_params_key": "13B"
+    },
+    "meta-llama/Llama-2-70b-chat-hf": {
+        "aliases": ["llama2-70b-chat"],
+        "distribution_channel": "HuggingFaceSnapshot",
+        "distribution_path": "meta-llama/Llama-2-70b-chat-hf",
+        "transformer_params_key": "70B"
+    },
+    "meta-llama/Meta-Llama-3-8B": {
+        "aliases": ["llama3"],
+        "distribution_channel": "HuggingFaceSnapshot",
+        "distribution_path": "meta-llama/Meta-Llama-3-8B"
+    },
+    "meta-llama/Meta-Llama-3-8B-Instruct": {
+        "aliases": ["llama3-chat", "llama3-instruct"],
+        "distribution_channel": "HuggingFaceSnapshot",
+        "distribution_path": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "transformer_params_key": "Meta-Llama-3-8B"
     },
     "meta-llama/CodeLlama-7b-Python-hf": {
         "aliases": ["codellama", "codellama-7b"],
@@ -17,7 +42,14 @@
     "mistralai/Mistral-7B-Instruct-v0.2": {
         "aliases": ["mistral-7b", "mistral-7b-instruct"],
         "distribution_channel": "HuggingFaceSnapshot",
-        "distribution_path": "mistralai/Mistral-7B-Instruct-v0.2"
+        "distribution_path": "mistralai/Mistral-7B-Instruct-v0.2",
+        "transformer_params_key": "Mistral-7B"
+    },
+    "openlm-research/open_llama_7b": {
+        "aliases": ["open-llama", "open-llama-7b"],
+        "distribution_channel": "HuggingFaceSnapshot",
+        "distribution_path": "openlm-research/open_llama_7b",
+        "transformer_params_key": "7B"
     },
     "stories15M": {
         "distribution_channel": "DirectDownload",
@@ -27,6 +59,14 @@
         ],
         "checkpoint_file": "stories15M.pt"
     },
+    "stories42M": {
+        "distribution_channel": "DirectDownload",
+        "distribution_path": [
+            "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.pt",
+            "https://github.com/karpathy/llama2.c/raw/master/tokenizer.model"
+        ],
+        "checkpoint_file": "stories42M.pt"
+    },
     "stories110M": {
         "distribution_channel": "DirectDownload",
         "distribution_path": [
diff --git a/config/model_config.py b/config/model_config.py
@@ -44,6 +44,8 @@ class ModelConfig:
         default=ModelDistributionChannel.HuggingFaceSnapshot
     )
     checkpoint_file: str = field(default="model.pth")
+    tokenizer_file: str = field(default="tokenizer.model")
+    transformer_params_key: str = field(default=None)
 
 
 # Keys are stored in lowercase.
diff --git a/download.py b/download.py
@@ -9,49 +9,54 @@
 from typing import Optional, Sequence
 
 from build.convert_hf_checkpoint import convert_hf_checkpoint
-from config.model_config import ModelDistributionChannel, resolve_model_config
+from config.model_config import (
+    ModelConfig,
+    ModelDistributionChannel,
+    resolve_model_config,
+)
 
 from requests.exceptions import HTTPError
 
 
-def _download_and_convert_hf_snapshot(
-    model: str, models_dir: Path, hf_token: Optional[str]
+def _download_hf_snapshot(
+    model_config: ModelConfig, models_dir: Path, hf_token: Optional[str]
 ):
-    model_dir = models_dir / model
+    model_dir = models_dir / model_config.name
     os.makedirs(model_dir, exist_ok=True)
 
     from huggingface_hub import snapshot_download
 
     # Download and store the HF model artifacts.
-    print(f"Downloading {model} from Hugging Face...")
+    print(f"Downloading {model_config.name} from HuggingFace...")
     try:
         snapshot_download(
-            model,
+            model_config.distribution_path,
             local_dir=model_dir,
             local_dir_use_symlinks=False,
             token=hf_token,
             ignore_patterns="*safetensors*",
         )
     except HTTPError as e:
         if e.response.status_code == 401:
+            os.rmdir(model_dir)
             raise RuntimeError(
                 "Access denied. Run huggingface-cli login to authenticate."
             )
-            os.rmdir(model_dir)
         else:
             raise e
 
+    
     # Convert the model to the torchchat format.
-    print(f"Converting {model} to torchchat format...")
-    convert_hf_checkpoint(model_dir=model_dir, model_name=model, remove_bin_files=True)
+    print(f"Converting {model_config.name} to torchchat format...")
+    convert_hf_checkpoint(model_dir=model_dir, model_name=model_config.name, remove_bin_files=True)
 
 
 def _download_direct(
-    model: str,
+    model_config: ModelConfig,
     urls: Sequence[str],
     models_dir: Path,
 ):
-    model_dir = models_dir / model
+    model_dir = models_dir / model_config.name
     os.makedirs(model_dir, exist_ok=True)
 
     for url in urls:
@@ -70,9 +75,9 @@ def download_and_convert(
         model_config.distribution_channel
         == ModelDistributionChannel.HuggingFaceSnapshot
     ):
-        _download_and_convert_hf_snapshot(model_config.name, models_dir, hf_token)
+        _download_hf_snapshot(model_config, models_dir, hf_token)
     elif model_config.distribution_channel == ModelDistributionChannel.DirectDownload:
-        _download_direct(model_config.name, model_config.distribution_path, models_dir)
+        _download_direct(model_config, model_config.distribution_path, models_dir)
     else:
         raise RuntimeError(
             f"Unknown distribution channel {model_config.distribution_channel}."

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"n_layers": 8, "n_heads": 8, "dim": 512, "hidden_dim": 1376}`
Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,8 @@ class ModelConfig:`
`44`	`44`	`default=ModelDistributionChannel.HuggingFaceSnapshot`
`45`	`45`	`)`
`46`	`46`	`checkpoint_file: str = field(default="model.pth")`
	`47`	`+ tokenizer_file: str = field(default="tokenizer.model")`
	`48`	`+ transformer_params_key: str = field(default=None)`
`47`	`49`
`48`	`50`
`49`	`51`	`# Keys are stored in lowercase.`