[CLI] Cleaned up torchchat.py cli to include all of our options (#290)

orionr · malfet · commit 24660969ae35 · 2024-07-17T09:55:42.000-07:00
Cleans up our CLI interface through `torchchat.py`, but also allows direct access to the command files. Updates help messages as well.
diff --git a/cli.py b/cli.py
@@ -9,11 +9,29 @@
 
 import torch
 
-default_device = "cpu"
+# CPU is always available and also exportable to ExecuTorch
+default_device = "cpu"  # 'cuda' if torch.cuda.is_available() else 'cpu'
 
 def check_args(args, name: str) -> None:
     pass
 
+def add_arguments_for_chat(parser):
+    # Only chat specific options should be here
+    _add_arguments_common(parser)
+
+
+def add_arguments_for_browser(parser):
+    # Only browser specific options should be here
+    _add_arguments_common(parser)
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=5000,
+        help="Port for the web server in browser mode"
+    )
+    _add_arguments_common(parser)
+
+
 def add_arguments_for_download(parser):
     # Only download specific options should be here
     _add_arguments_common(parser)
@@ -33,158 +51,204 @@ def add_arguments_for_export(parser):
     # Only export specific options should be here
     _add_arguments_common(parser)
 
-def add_arguments_for_browser(parser):
-    # Only browser specific options should be here
-    _add_arguments_common(parser)
-    parser.add_argument(
-        "--port",
-        type=int,
-        default=5000,
-        help="Port for the web server for browser mode."
-    )
 
 def _add_arguments_common(parser):
     # Model specification. TODO Simplify this.
     # A model can be specified using a positional model name or HuggingFace
     # path. Alternatively, the model can be specified via --gguf-path or via
     # an explicit --checkpoint-dir, --checkpoint-path, or --tokenizer-path.
-
     parser.add_argument(
         "model",
         type=str,
         nargs="?",
         default=None,
-        help="Model name for well-known models.",
+        help="Model name for well-known models",
     )
 
+
+def add_arguments(parser):
     # TODO: Refactor this so that only common options are here
-    # and subcommand-specific options are inside individual
+    # and command-specific options are inside individual
     # add_arguments_for_generate, add_arguments_for_export etc.
+
     parser.add_argument(
-        "--seed",
-        type=int,
-        default=1234,  # set None for release
-        help="Initialize torch seed",
-    )
-    parser.add_argument(
-        "--prompt", type=str, default="Hello, my name is", help="Input prompt."
+        "--chat",
+        action="store_true",
+        help="Whether to start an interactive chat session",
     )
     parser.add_argument(
-        "--tiktoken",
+        "--gui",
         action="store_true",
-        help="Whether to use tiktoken tokenizer.",
+        help="Whether to use a web UI for an interactive chat session",
     )
     parser.add_argument(
-        "--chat",
-        action="store_true",
-        help="Use torchchat for an interactive chat session.",
+        "--prompt",
+        type=str,
+        default="Hello, my name is",
+        help="Input prompt",
     )
     parser.add_argument(
         "--is-chat-model",
         action="store_true",
-        help="Indicate that the model was trained to support chat functionality.",
+        help="Indicate that the model was trained to support chat functionality",
     )
     parser.add_argument(
-        "--gui",
+        "--seed",
+        type=int,
+        default=None,
+        help="Initialize torch seed",
+    )
+    parser.add_argument(
+        "--tiktoken",
         action="store_true",
-        help="Use torchchat to for an interactive gui-chat session.",
+        help="Whether to use tiktoken tokenizer",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=1,
+        help="Number of samples",
+    )
+    parser.add_argument(
+        "--max-new-tokens",
+        type=int,
+        default=200,
+        help="Maximum number of new tokens",
     )
-    parser.add_argument("--num-samples", type=int, default=1, help="Number of samples.")
     parser.add_argument(
-        "--max-new-tokens", type=int, default=200, help="Maximum number of new tokens."
+        "--top-k",
+        type=int,
+        default=200,
+        help="Top-k for sampling",
     )
-    parser.add_argument("--top-k", type=int, default=200, help="Top-k for sampling.")
     parser.add_argument(
-        "--temperature", type=float, default=0.8, help="Temperature for sampling."
+        "--temperature",
+        type=float,
+        default=0.8,
+        help="Temperature for sampling"
     )
     parser.add_argument(
-        "--compile", action="store_true", help="Whether to compile the model."
+        "--compile",
+        action="store_true",
+        help="Whether to compile the model with torch.compile",
     )
     parser.add_argument(
         "--compile-prefill",
         action="store_true",
-        help="Whether to compile the prefill (improves prefill perf, but higher compile times)",
+        help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.",
+    )
+    parser.add_argument(
+        "--profile",
+        type=Path,
+        default=None,
+        help="Profile path.",
     )
-    parser.add_argument("--profile", type=Path, default=None, help="Profile path.")
     parser.add_argument(
-        "--speculate-k", type=int, default=5, help="Speculative execution depth."
+        "--speculate-k",
+        type=int,
+        default=5,
+        help="Speculative execution depth",
     )
     parser.add_argument(
         "--draft-checkpoint-path",
         type=Path,
         default=None,
-        help="Draft checkpoint path.",
+        help="Use the specified draft checkpoint path",
     )
     parser.add_argument(
         "--checkpoint-path",
         type=Path,
         default="not_specified",
-        help="Model checkpoint path.",
+        help="Use the specified model checkpoint path",
     )
-    # parser.add_argument(
-    #     "--checkpoint-dir",
-    #     type=Path,
-    #     default=None,
-    #     help="Model checkpoint directory.",
-    # )
     parser.add_argument(
         "--params-path",
         type=Path,
         default=None,
-        help="Parameter file path.",
+        help="Use the specified parameter file",
     )
     parser.add_argument(
         "--gguf-path",
         type=Path,
         default=None,
-        help="GGUF file path.",
+        help="Use the specified GGUF model file",
     )
     parser.add_argument(
         "--tokenizer-path",
         type=Path,
         default=None,
-        help="Model checkpoint path.",
+        help="Use the specified model tokenizer file",
+    )
+    parser.add_argument(
+        "--output-pte-path",
+        type=str,
+        default=None,
+        help="Output to the specified ExecuTorch .pte model file",
+    )
+    parser.add_argument(
+        "--output-dso-path",
+        type=str,
+        default=None,
+        help="Output to the specified AOT Inductor .dso model file",
     )
-    parser.add_argument("--output-pte-path", type=str, default=None, help="Filename")
-    parser.add_argument("--output-dso-path", type=str, default=None, help="Filename")
     parser.add_argument(
-        "--dso-path", type=Path, default=None, help="Use the specified AOTI DSO model."
+        "--dso-path",
+        type=Path,
+        default=None,
+        help="Use the specified AOT Inductor .dso model file",
     )
     parser.add_argument(
         "--pte-path",
         type=Path,
         default=None,
-        help="Use the specified Executorch PTE model.",
+        help="Use the specified ExecuTorch .pte model file",
     )
     parser.add_argument(
-        "-d",
-        "--dtype",
+        "-d", "--dtype",
         default="float32",
         help="Override the dtype of the model (default is the checkpoint dtype). Options: bf16, fp16, fp32",
     )
-    parser.add_argument("-v", "--verbose", action="store_true")
     parser.add_argument(
-        "--quantize", type=str, default="{ }", help="Quantization options."
+        "-v", "--verbose",
+        action="store_true",
+        help="Verbose output",
+    )
+    parser.add_argument(
+        "--quantize",
+        type=str,
+        default="{ }",
+        help="Quantization options",
+    )
+    parser.add_argument(
+        "--params-table",
+        type=str,
+        default=None,
+        help="Parameter table to use",
     )
-    parser.add_argument("--params-table", type=str, default=None, help="Device to use")
     parser.add_argument(
-        "--device", type=str, default=default_device, help="Device to use"
+        "--device",
+        type=str,
+        default=default_device,
+        help="Hardware device to use. Options: cpu, gpu, mps",
     )
     parser.add_argument(
         "--tasks",
         nargs="+",
         type=str,
         default=["hellaswag"],
-        help="list of lm-eluther tasks to evaluate usage: --tasks task1 task2",
+        help="List of lm-eluther tasks to evaluate. Usage: --tasks task1 task2",
     )
     parser.add_argument(
-        "--limit", type=int, default=None, help="number of samples to evaluate"
+        "--limit",
+        type=int,
+        default=None,
+        help="Number of samples to evaluate",
     )
     parser.add_argument(
         "--max-seq-length",
         type=int,
         default=None,
-        help="maximum length sequence to evaluate",
+        help="Maximum length sequence to evaluate",
     )
     parser.add_argument(
         "--hf-token",
@@ -201,7 +265,6 @@ def _add_arguments_common(parser):
 
 
 def arg_init(args):
-
     if Path(args.quantize).is_file():
         with open(args.quantize, "r") as f:
             args.quantize = json.loads(f.read())
diff --git a/eval.py b/eval.py
@@ -20,7 +20,11 @@
 )
 
 from build.model import Transformer
-from cli import add_arguments_for_eval, arg_init
+from cli import (
+    add_arguments,
+    add_arguments_for_eval,
+    arg_init,
+)
 from download import download_and_convert, is_model_downloaded
 from generate import encode_tokens, model_forward
 
@@ -281,7 +285,8 @@ def main(args) -> None:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Export specific CLI.")
+    parser = argparse.ArgumentParser(description="torchchat eval CLI")
+    add_arguments(parser)
     add_arguments_for_eval(parser)
     args = parser.parse_args()
     args = arg_init(args)
diff --git a/export.py b/export.py
@@ -15,7 +15,12 @@
     _unset_gguf_kwargs,
     BuilderArgs,
 )
-from cli import add_arguments_for_export, arg_init, check_args
+from cli import (
+    add_arguments,
+    add_arguments_for_export,
+    arg_init,
+    check_args,
+)
 from download import download_and_convert, is_model_downloaded
 from export_aoti import export_model as export_model_aoti
 
@@ -106,7 +111,8 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Export specific CLI.")
+    parser = argparse.ArgumentParser(description="torchchat export CLI")
+    add_arguments(parser)
     add_arguments_for_export(parser)
     args = parser.parse_args()
     check_args(args, "export")
diff --git a/export_aoti.py b/export_aoti.py
@@ -10,6 +10,7 @@
 
 from torch.export import Dim
 
+# CPU is always available and also exportable to ExecuTorch
 default_device = "cpu"  # 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
diff --git a/export_et.py b/export_et.py
@@ -30,6 +30,7 @@
 from torch._export import capture_pre_autograd_graph
 
 
+# CPU is always available and also exportable to ExecuTorch
 default_device = "cpu"  # 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
diff --git a/generate.py b/generate.py
@@ -26,7 +26,12 @@
     TokenizerArgs,
 )
 from build.model import Transformer
-from cli import add_arguments_for_generate, arg_init, check_args
+from cli import (
+    add_arguments,
+    add_arguments_for_generate,
+    arg_init,
+    check_args,
+)
 from download import download_and_convert, is_model_downloaded
 from quantize import set_precision
 
@@ -568,7 +573,8 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generate specific CLI.")
+    parser = argparse.ArgumentParser(description="torchchat generate CLI")
+    add_arguments(parser)
     add_arguments_for_generate(parser)
     args = parser.parse_args()
     check_args(args, "generate")
diff --git a/torchchat.py b/torchchat.py

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@`
`10`	`10`
`11`	`11`	`from torch.export import Dim`
`12`	`12`
	`13`	`+# CPU is always available and also exportable to ExecuTorch`
`13`	`14`	`default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'`
`14`	`15`
`15`	`16`
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@`
`30`	`30`	`from torch._export import capture_pre_autograd_graph`
`31`	`31`
`32`	`32`
	`33`	`+# CPU is always available and also exportable to ExecuTorch`
`33`	`34`	`default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'`
`34`	`35`
`35`	`36`