pytorch
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎tokenizer/base.py
Lines changed: 32 additions & 0 deletions b/‎tokenizer/base.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎tokenizer/hf_tokenizer.py
Lines changed: 92 additions & 0 deletions b/‎tokenizer/hf_tokenizer.py
Lines changed: 92 additions & 0 deletions
diff --git a/‎tokenizer/tiktoken.py
Lines changed: 3 additions & 1 deletion b/‎tokenizer/tiktoken.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎torchchat/cli/builder.py
Lines changed: 55 additions & 5 deletions b/‎torchchat/cli/builder.py
Lines changed: 55 additions & 5 deletions
diff --git a/‎torchchat/cli/cli.py
Lines changed: 13 additions & 7 deletions b/‎torchchat/cli/cli.py
Lines changed: 13 additions & 7 deletions
@@ -575,7 +575,7 @@ We really value our community and the contributions made by our wonderful users.
 
 To connect with us and other community members, we invite you to join our Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
 * Head to the `#torchchat-general` channel for general questions, discussion, and community support.
-* Join the `#torchchat-contribution` channel if you're interested in contributing directly to project development.
+* Join the `#torchchat-contributors` channel if you're interested in contributing directly to project development.
 
 Looking forward to discussing with you about torchchat future!
 
 
@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Abstract base class for all tokenizer classes in python matching c++ interface.
+"""
+
+# Standard
+from abc import ABC, abstractmethod
+from typing import List
+
+
+class TokenizerBase(ABC):
+    __doc__ = __doc__
+
+    @abstractmethod
+    def encode(self, s: str, *, bos: bool = False, eos: bool = False) -> List[int]:
+        """Encode the given string and optionally include bos/eos tokens"""
+
+    @abstractmethod
+    def decode(self, ids: List[int]) -> str:
+        """Decode the given token ids into a string"""
+
+    @abstractmethod
+    def bos_id(self) -> int:
+        """The id of the begin-of-string token"""
+
+    @abstractmethod
+    def eos_id(self) -> int:
+        """The id of the end-of-string token"""
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Standard
+from typing import List, Optional
+import json
+import os
+
+# Third Party
+from tokenizers import Tokenizer
+
+# Local
+from .base import TokenizerBase
+
+
+class HFTokenizer(TokenizerBase):
+    """
+    Wrapper around the Huggingface `tokenizers` library for API compatibility
+    """
+
+    def __init__(self, file_path: str):
+        # If the path is a directory, look for "tokenizer.json" which is
+        # standard for transformers checkpoints and also look for the
+        # "tokenizer_config.json" file to parse eos/bos tokens
+        if os.path.isdir(file_path):
+            tokenizer_path = os.path.join(file_path, "tokenizer.json")
+            tokenizer_config_path = os.path.join(file_path, "tokenizer_config.json")
+        else:
+            tokenizer_path = file_path
+            tokenizer_config_path = os.path.join(os.path.dirname(file_path), "tokenizer_config.json")
+        if not os.path.isfile(tokenizer_path):
+            tokenizer_config_path = None
+
+        # Load the tokenizer itself
+        self._tokenizer = Tokenizer.from_file(tokenizer_path)
+
+        # If available, parse bos/eos tokens from the tokenizer config
+        self._bos_id, self._eos_id = None, None
+        if tokenizer_config_path is not None:
+            with open(tokenizer_config_path, "r") as handle:
+                tok_config = json.load(handle)
+            bos_token = tok_config.get("bos_token")
+            eos_token = tok_config.get("eos_token")
+            if bos_token is not None:
+                self._bos_id = self._tokenizer.token_to_id(bos_token)
+            if eos_token is not None:
+                self._eos_id = self._tokenizer.token_to_id(eos_token)
+
+        # If no eos/bos tokens found, go looking for them!
+        if None in [self._bos_id, self._eos_id]:
+            tok_content = json.loads(self._tokenizer.to_str())
+            if self._bos_id is None:
+                self._bos_id = self._look_for_special_token(tok_content, ["begin", "text"])
+            if self._eos_id is None:
+                self._eos_id = self._look_for_special_token(tok_content, ["end", "text"])
+
+        assert None not in [self._bos_id, self._eos_id], "Unable to find an BOS/EOS tokens"
+
+    @staticmethod
+    def _look_for_special_token(added_tokens: dict, search_strs: List[str]) -> Optional[int]:
+        candidate_toks = added_tokens
+        for search_str in search_strs:
+            candidate_toks = [
+                tok for tok in candidate_toks
+                if tok["special"] and search_str in tok["content"]
+            ]
+            if len(candidate_toks) == 1:
+                return candidate_toks[0]["id"]
+
+    def encode(
+        self,
+        s: str,
+        *,
+        bos: bool = False,
+        eos: bool = False,
+    ) -> List[int]:
+        res = self._tokenizer.encode(s, add_special_tokens=bos).ids
+        if eos and (not res or res[-1] != self._eos_token):
+            res.append(self._eos_token)
+        return res
+
+    def decode(self, ids: List[int]) -> str:
+        return self._tokenizer.decode(ids)
+
+    def bos_id(self) -> int:
+        return self._bos_id
+
+    def eos_id(self) -> int:
+        return self._eos_id
@@ -23,6 +23,8 @@
 import tiktoken
 from tiktoken.load import load_tiktoken_bpe
 
+from .base import TokenizerBase
+
 
 logger = getLogger(__name__)
 
@@ -38,7 +40,7 @@ class Message(TypedDict):
 Dialog = Sequence[Message]
 
 
-class Tokenizer:
+class Tokenizer(TokenizerBase):
     """
     tokenizing and encoding/decoding text using the Tiktoken tokenizer.
     """
 
@@ -215,6 +215,7 @@ class TokenizerArgs:
     tokenizer_path: Optional[Union[Path, str]] = None
     is_sentencepiece: bool = False
     is_tiktoken: bool = False
+    is_hf_tokenizer: bool = False
     t: Optional[Any] = None
 
     def __post_init__(self):
@@ -224,6 +225,7 @@ def __post_init__(self):
             self.t = TiktokenTokenizer(model_path=str(self.tokenizer_path))
             self.is_tiktoken = True
             self.is_sentencepiece = False
+            self.is_hf_tokenizer = False
             return
         except:
             pass
@@ -234,12 +236,25 @@ def __post_init__(self):
             self.t = SentencePieceProcessor(model_file=str(self.tokenizer_path))
             self.is_tiktoken = False
             self.is_sentencepiece = True
+            self.is_hf_tokenizer = False
+            return
+        except:
+            pass
+
+        try:
+            from tokenizer.hf_tokenizer import HFTokenizer
+
+            self.t = HFTokenizer(str(self.tokenizer_path))
+            self.is_tiktoken = False
+            self.is_sentencepiece = False
+            self.is_hf_tokenizer = True
             return
         except:
             pass
 
         self.is_tiktoken = False
         self.is_sentencepiece = False
+        self.is_hf_tokenizer = False
         self.t = None
         return
 
@@ -251,16 +266,27 @@ def validate_model(
         if model is None:
             return
 
-        if self.is_tiktoken == self.is_sentencepiece:
+        if sum([self.is_tiktoken, self.is_hf_tokenizer, self.is_sentencepiece]) != 1:
             raise RuntimeError(f"no tokenizer was found at {self.tokenizer_path}")
 
         is_tiktoken = self.is_tiktoken
         is_sentencepiece = self.is_sentencepiece
+        is_hf_tokenizer = self.is_hf_tokenizer
         use_tiktoken = model.config.use_tiktoken
+        use_hf_tokenizer = model.config.use_hf_tokenizer
+        use_sentencepiece = not (use_tiktoken or use_hf_tokenizer)
 
-        if not (is_tiktoken == use_tiktoken) or not (is_sentencepiece != use_tiktoken):
+        if (
+            (is_tiktoken and not use_tiktoken) or
+            (is_hf_tokenizer and not use_hf_tokenizer) or
+            (is_sentencepiece and not use_sentencepiece)
+        ):
             raise RuntimeError(
-                f"model-specified tokenizer ({tokenizer_setting_to_name(use_tiktoken)}) does not match provided tokenizer ({tokenizer_setting_to_name(is_tiktoken)}) for {model_description}"
+                "model-specified tokenizer ({}) does not match provided tokenizer ({}) for {}".format(
+                    tokenizer_setting_to_name(use_tiktoken, use_hf_tokenizer),
+                    tokenizer_setting_to_name(is_tiktoken, is_hf_tokenizer),
+                    model_description,
+                )
             )
 
         return
@@ -510,6 +536,15 @@ def _load_model(builder_args: BuilderArgs) -> Model:
         model = _load_model_default(builder_args)
     # model = _maybe_parallelize_model(model, builder_args, world_mesh, parallel_dims)
 
+    if builder_args.dso_path or builder_args.aoti_package_path:
+        # AOTI-compoiled model will load its own weights.
+        # Release weights here to avoid OOM
+        import gc
+        if hasattr(model, "model"):
+            model.model = None
+        gc.collect()
+        torch.cuda.empty_cache()
+
     model = model.to(device=builder_args.device, dtype=builder_args.precision)
     return model.eval()
 
@@ -558,6 +593,12 @@ def _initialize_model(
             # attributes will NOT be seen on by AOTI-compiled forward
             # function, e.g. calling model.setup_cache will NOT touch
             # AOTI compiled and maintained model buffers such as kv_cache.
+            # Using cpp runner to run AOTI compiled model is recommended.
+
+            def do_nothing(max_batch_size, max_seq_length):
+                pass
+            model.setup_caches = do_nothing
+
             model.forward = torch._export.aot_load(
                 str(builder_args.dso_path.absolute()), builder_args.device
             )
@@ -591,6 +632,11 @@ def _initialize_model(
             aoti_compiled_model = load_package(
                 str(builder_args.aoti_package_path.absolute())
             )
+
+            def do_nothing(max_batch_size, max_seq_length):
+                pass
+            model.setup_caches = do_nothing
+
             model.forward = aoti_compiled_model
             metadata = aoti_compiled_model.get_metadata()
             builder_args.device = metadata["AOTI_DEVICE_KEY"]
@@ -655,5 +701,9 @@ def _initialize_model(
     return model
 
 
-def tokenizer_setting_to_name(tiktoken: bool = False) -> str:
-    return "TikToken" if tiktoken else "SentencePiece"
+def tokenizer_setting_to_name(tiktoken: bool, tokenizers: bool) -> str:
+    if tiktoken:
+        return "TikToken"
+    if tokenizers:
+        return "Tokenizers"
+    return "SentencePiece"
@@ -5,20 +5,16 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
+import importlib.metadata
 import json
 import logging
 import os
 import sys
 from pathlib import Path
 
-import torch
-
-from torchchat.cli.download import download_and_convert, is_model_downloaded
-
 from torchchat.utils.build_utils import (
     allowable_dtype_names,
     allowable_params_table,
-    get_device_str,
 )
 
 logging.basicConfig(level=logging.INFO, format="%(message)s")
@@ -42,6 +38,9 @@
 
 # Handle CLI arguments that are common to a majority of subcommands.
 def check_args(args, verb: str) -> None:
+    # Local import to avoid unnecessary expensive imports
+    from torchchat.cli.download import download_and_convert, is_model_downloaded
+
     # Handle model download. Skip this for download, since it has slightly
     # different semantics.
     if (
@@ -498,9 +497,10 @@ def _add_speculative_execution_args(parser) -> None:
 
 
 def arg_init(args):
-    if not (torch.__version__ > "2.3"):
+    torch_version = importlib.metadata.version("torch")
+    if not torch_version or (torch_version <= "2.3"):
         raise RuntimeError(
-            f"You are using PyTorch {torch.__version__}. At this time, torchchat uses the latest PyTorch technology with high-performance kernels only available in PyTorch nightly until the PyTorch 2.4 release"
+            f"You are using PyTorch {torch_version}. At this time, torchchat uses the latest PyTorch technology with high-performance kernels only available in PyTorch nightly until the PyTorch 2.4 release"
         )
 
     if sys.version_info.major != 3 or sys.version_info.minor < 10:
@@ -521,6 +521,9 @@ def arg_init(args):
             raise RuntimeError("Device not supported by ExecuTorch")
         args.device = "cpu"
     else:
+        # Localized import to minimize expensive imports
+        from torchchat.utils.build_utils import get_device_str
+
         args.device = get_device_str(
             args.quantize.get("executor", {}).get("accelerator", args.device)
         )
@@ -534,5 +537,8 @@ def arg_init(args):
             vars(args)["compile_prefill"] = False
 
     if hasattr(args, "seed") and args.seed:
+        # Localized import to minimize expensive imports
+        import torch
+
         torch.manual_seed(args.seed)
     return args