pytorch · jackzhxng · Oct 9, 2024
@@ -149,7 +149,7 @@ def main() -> None:
         root_dir_path=get_root_dir_path(), conda_env_name=args.conda_environment_name
     )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -158,7 +158,7 @@ def main():
             f"Valid compute units are {valid_compute_units}."
         )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -152,7 +152,7 @@ def get_model_config(args):
         raise RuntimeError(f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}.")
 
     model_config = get_model_config(args)
-    model, example_inputs, _ = EagerModelFactory.create_model(**model_config)
+    model, example_inputs, _, _ = EagerModelFactory.create_model(**model_config)
 
     model = model.eval()
 

@@ -50,7 +50,7 @@ def get_model_and_inputs_from_name(model_name: str):
         logging.warning(
             "Using a model from examples/models not all of these are currently supported"
         )
-        model, example_inputs, _ = EagerModelFactory.create_model(
+        model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL[model_name]
         )
     # Case 3: Model is in an external python file loaded as a module.

@@ -139,7 +139,7 @@ def main() -> None:
             f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}."
         )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -74,7 +74,7 @@ def main() -> None:
             f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}."
         )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -774,7 +774,7 @@ def _load_llama_model(
     logging.info(
         f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}"
     )
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, example_kwarg_inputs, _ = EagerModelFactory.create_model(
         "llama2",
         "Llama2Model",
         checkpoint=checkpoint,
@@ -824,6 +824,7 @@ def _load_llama_model(
         use_kv_cache=use_kv_cache,
         generate_full_logits=generate_full_logits,
         example_inputs=example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
         enable_dynamic_shape=enable_dynamic_shape,
         calibration_tasks=calibration_tasks,
         calibration_limit=calibration_limit,

@@ -31,7 +31,7 @@ def __init__(self, args):
             **params,
         )
         super().__init__(tokenizer_path=args.tokenizer, model_args=model_args)
-        self.model, _, _ = EagerModelFactory.create_model(
+        self.model, _, _, _ = EagerModelFactory.create_model(
             "llama2",
             "Llama2Model",
             checkpoint=args.checkpoint,

@@ -6,7 +6,7 @@
 
 import importlib
 import os
-from typing import Any, Tuple
+from typing import Any, Dict, Tuple
 
 import torch
 
@@ -19,7 +19,7 @@ class EagerModelFactory:
     @staticmethod
     def create_model(
         module_name, model_class_name, **kwargs
-    ) -> Tuple[torch.nn.Module, Any, Any]:
+    ) -> Tuple[torch.nn.Module, Tuple[Any], Dict[str, Any], Any]:
         """
         Create an instance of a model class that implements EagerModelBase and retrieve related data.
 
@@ -42,14 +42,18 @@ def create_model(
         if hasattr(module, model_class_name):
             model_class = getattr(module, model_class_name)
             model = model_class(**kwargs)
+            example_kwarg_inputs = None
+            dynamic_shapes = None
+            if hasattr(model, "get_example_kwarg_inputs()"):
+                example_kwarg_inputs = model.get_example_kwarg_inputs()
             if hasattr(model, "get_dynamic_shapes"):
-                return (
-                    model.get_eager_model(),
-                    model.get_example_inputs(),
-                    model.get_dynamic_shapes(),
-                )
-            else:
-                return model.get_eager_model(), model.get_example_inputs(), None
+                dynamic_shapes = model.get_dynamic_shapes()
+            return (
+                model.get_eager_model(),
+                model.get_example_inputs(),
+                example_kwarg_inputs,
+                dynamic_shapes,
+            )
 
         raise ValueError(
             f"Model class '{model_class_name}' not found in module '{module_name}'."

@@ -69,7 +69,7 @@ def validate_tensor_allclose(
         return self.assertTrue(result)
 
     def test_mv3_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["mv3"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -81,7 +81,7 @@ def test_mv3_export_to_executorch(self):
         )
 
     def test_mv2_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["mv2"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -90,7 +90,7 @@ def test_mv2_export_to_executorch(self):
         self.validate_tensor_allclose(eager_output, executorch_output[0])
 
     def test_vit_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["vit"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -102,7 +102,7 @@ def test_vit_export_to_executorch(self):
         )
 
     def test_w2l_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["w2l"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -111,7 +111,7 @@ def test_w2l_export_to_executorch(self):
         self.validate_tensor_allclose(eager_output, executorch_output[0])
 
     def test_ic3_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["ic3"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -123,7 +123,7 @@ def test_ic3_export_to_executorch(self):
         )
 
     def test_resnet18_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["resnet18"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -132,7 +132,7 @@ def test_resnet18_export_to_executorch(self):
         self.validate_tensor_allclose(eager_output, executorch_output[0])
 
     def test_resnet50_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["resnet50"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(
@@ -141,7 +141,7 @@ def test_resnet50_export_to_executorch(self):
         self.validate_tensor_allclose(eager_output, executorch_output[0])
 
     def test_dl3_export_to_executorch(self):
-        eager_model, example_inputs, _ = EagerModelFactory.create_model(
+        eager_model, example_inputs, _, _ = EagerModelFactory.create_model(
             *MODEL_NAME_TO_MODEL["dl3"]
         )
         eager_output, executorch_output = self.collect_executorch_and_eager_outputs(

@@ -58,7 +58,7 @@ def main() -> None:
             f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}."
         )
 
-    model, example_inputs, dynamic_shapes = EagerModelFactory.create_model(
+    model, example_inputs, _, dynamic_shapes = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -57,7 +57,7 @@ def export_composite_module_with_lower_graph():
         "Running the example to export a composite module with lowered graph..."
     )
 
-    m, m_inputs, _ = EagerModelFactory.create_model(*MODEL_NAME_TO_MODEL["add_mul"])
+    m, m_inputs, _, _ = EagerModelFactory.create_model(*MODEL_NAME_TO_MODEL["add_mul"])
     m_compile_spec = m.get_compile_spec()
 
     # pre-autograd export. eventually this will become torch.export
@@ -166,7 +166,7 @@ def export_and_lower_the_whole_graph():
     """
     logging.info("Running the example to export and lower the whole graph...")
 
-    m, m_inputs, _ = EagerModelFactory.create_model(*MODEL_NAME_TO_MODEL["add_mul"])
+    m, m_inputs, _, _ = EagerModelFactory.create_model(*MODEL_NAME_TO_MODEL["add_mul"])
     m_compile_spec = m.get_compile_spec()
 
     m_inputs = m.get_example_inputs()

@@ -58,7 +58,7 @@ def main() -> None:
             f"Available models are {list(MODEL_NAME_TO_MODEL.keys())}."
         )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -79,7 +79,7 @@
             f"Available models are {list(MODEL_NAME_TO_OPTIONS.keys())}."
         )
 
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
 

@@ -162,7 +162,7 @@ def main() -> None:
         )
 
     start = time.perf_counter()
-    model, example_inputs, _ = EagerModelFactory.create_model(
+    model, example_inputs, _, _ = EagerModelFactory.create_model(
         *MODEL_NAME_TO_MODEL[args.model_name]
     )
     end = time.perf_counter()

@@ -26,6 +26,8 @@
 def _to_core_aten(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     strict=True,
     verbose=True,
@@ -38,7 +40,11 @@ def _to_core_aten(
             f"Expected passed in model to be an instance of fx.GraphModule, got {type(model)}"
         )
     core_aten_ep = export(
-        model, example_inputs, dynamic_shapes=dynamic_shapes, strict=strict
+        model,
+        example_inputs,
+        example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
     )
     if verbose:
         logging.info(f"Core ATen graph:\n{core_aten_ep.graph}")
@@ -69,14 +75,21 @@ def _core_aten_to_edge(
 def export_to_edge(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     edge_constant_methods: Optional[Dict[str, Any]] = None,
     edge_compile_config=_EDGE_COMPILE_CONFIG,
     strict=True,
     verbose=True,
 ) -> EdgeProgramManager:
     core_aten_ep = _to_core_aten(
-        model, example_inputs, dynamic_shapes, strict=strict, verbose=verbose
+        model,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+        verbose=verbose,
     )
     return _core_aten_to_edge(
         core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
@@ -86,6 +99,8 @@ def export_to_edge(
 def export_to_exec_prog(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict[str, Any]] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     edge_constant_methods: Optional[Dict[str, Any]] = None,
     edge_compile_config=_EDGE_COMPILE_CONFIG,
@@ -96,7 +111,13 @@ def export_to_exec_prog(
     # pre-autograd export. eventually this will become torch.export
     m = export_for_training(m, example_inputs).module()
 
-    core_aten_ep = _to_core_aten(m, example_inputs, dynamic_shapes, strict=strict)
+    core_aten_ep = _to_core_aten(
+        m,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+    )
 
     edge_m = _core_aten_to_edge(
         core_aten_ep, edge_constant_methods, edge_compile_config

@@ -10,7 +10,7 @@
 
 import logging
 from enum import Enum
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
 import torch
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
@@ -68,6 +68,7 @@ def __init__(
         dtype,
         use_kv_cache,
         example_inputs,
+        example_kwarg_inputs: Optional[Dict] = None,
         args: Optional[Any] = None,
         enable_dynamic_shape: bool = False,
         generate_full_logits: bool = False,
@@ -87,6 +88,7 @@ def __init__(
         self.max_seq_len = max_seq_len
         self.dtype = dtype
         self.example_inputs = example_inputs
+        self.example_kwarg_inputs = example_kwarg_inputs
         self.use_kv_cache = use_kv_cache
         self.generate_full_logits = generate_full_logits
         self.enable_dynamic_shape = enable_dynamic_shape
@@ -186,12 +188,16 @@ def capture_pre_autograd_graph(self) -> "LLMEdgeManager":
                 self.pre_autograd_graph_module = torch.export.export(
                     self.model,
                     self.example_inputs,
+                    self.example_kwarg_inputs,
                     dynamic_shapes=dynamic_shape,
                     strict=True,
                 ).module()
             else:
                 self.pre_autograd_graph_module = capture_pre_autograd_graph(
-                    self.model, self.example_inputs, dynamic_shapes=dynamic_shape
+                    self.model,
+                    self.example_inputs,
+                    kwargs=self.example_kwarg_inputs,
+                    dynamic_shapes=dynamic_shape,
                 )
 
         return self
@@ -340,6 +346,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
             self.edge_manager = export_to_edge(
                 self.pre_autograd_graph_module,  # pyre-fixme[6]
                 self.example_inputs,
+                example_kwarg_inputs=self.example_kwarg_inputs,
                 dynamic_shapes=dynamic_shape,
                 edge_constant_methods=self.metadata,
                 edge_compile_config=edge_config,