Remove tiktoken flag (#426)

mikekgfb · malfet · commit bc2c9a31c302 · 2024-07-17T09:55:43.000-07:00
* remove need for toktoken flag

* can't pass self to a function

* remove toktoken cli flag

* eliminate need to load entire model when we only need model.config
diff --git a/build/builder.py b/build/builder.py
@@ -173,7 +173,6 @@ def __post_init__(self):
         self.t = None
         return
 
-
     def validate_model(
         self,
         model: Transformer,
@@ -186,7 +185,7 @@ def validate_model(
 
         if condition:
             raise RuntimeError(
-                "test"  # f"model-specified tokenizer ({tokenizer_setting_to_name(model.config.use_tiktoken)} does not match provided tokenizer ({tokenizer_setting_to_name(self.is_tiktoken)} for {model_description}"
+                f"model-specified tokenizer ({tokenizer_setting_to_name(model.config.use_tiktoken)} does not match provided tokenizer ({tokenizer_setting_to_name(self.is_tiktoken)} for {model_description}"
             )
 
         return
@@ -256,7 +255,7 @@ def _unset_gguf_kwargs(builder_args):
     builder_args.gguf_kwargs = None
 
 
-def _load_model_gguf(builder_args):
+def _load_model_gguf(builder_args, only_config=False):
     assert builder_args.gguf_path
     if builder_args.gguf_kwargs is None:
         kwargs = {}
@@ -266,7 +265,7 @@ def _load_model_gguf(builder_args):
     return model
 
 
-def _load_model_default(builder_args):
+def _load_model_default(builder_args, only_config=False):
     assert not builder_args.gguf_path
 
     with torch.device("meta"):
@@ -319,7 +318,7 @@ def _load_model_default(builder_args):
     return model
 
 
-def _load_model(builder_args):
+def _load_model(builder_args, only_config=False):
     if builder_args.gguf_path:
         model = _load_model_gguf(builder_args)
     else:
@@ -341,7 +340,6 @@ def _initialize_model(
     tokenizer=None,
 ):
     print("Loading model ...")
-    t0 = time.time()
 
     if builder_args.gguf_path and (builder_args.dso_path or builder_args.pte_path):
         print("Setting gguf_kwargs for generate.")
@@ -354,16 +352,17 @@ def _initialize_model(
         #   (no unpack available)
         _set_gguf_kwargs(builder_args, is_et=is_pte, context="generate")
 
-    model_ = _load_model(builder_args)
-    device_sync(device=builder_args.device)
-    print(f"Time to load model: {time.time() - t0:.02f} seconds")
-
     if builder_args.dso_path:
         assert (
             quantize is None or quantize == "{ }"
         ), "quantize not valid for exported DSO model. Specify quantization during export."
+
+        t0 = time.time()
+        model = _load_model(builder_args, only_config=True)
+        device_sync(device=builder_args.device)
+        print(f"Time to load model: {time.time() - t0:.02f} seconds")
+
         try:
-            model = model_
             # Replace model forward with the AOT-compiled forward
             # This is a hacky way to quickly demo AOTI's capability.
             # model is still a Python object, and any mutation to its
@@ -379,14 +378,23 @@ def _initialize_model(
         assert (
             quantize is None or quantize == "{ }"
         ), "quantize not valid for exported PTE model. Specify quantization during export."
+
+        t0 = time.time()
+        model = _load_model(builder_args, only_config=True)
+        device_sync(device=builder_args.device)
+        print(f"Time to load model: {time.time() - t0:.02f} seconds")
+
         try:
             from build.model_et import PTEModel
 
-            model = PTEModel(model_.config, builder_args.pte_path)
+            model = PTEModel(model.config, builder_args.pte_path)
         except Exception:
             raise RuntimeError(f"Failed to load ET compiled {builder_args.pte_path}")
     else:
-        model = model_
+        t0 = time.time()
+        model = _load_model(builder_args)
+        device_sync(device=builder_args.device)
+        print(f"Time to load model: {time.time() - t0:.02f} seconds")
 
         if quantize:
             t0q = time.time()
diff --git a/cli.py b/cli.py
@@ -112,11 +112,6 @@ def add_arguments(parser):
         default=None,
         help="Initialize torch seed",
     )
-    parser.add_argument(
-        "--tiktoken",
-        action="store_true",
-        help="Whether to use tiktoken tokenizer",
-    )
     parser.add_argument(
         "--num-samples",
         type=int,