Add kwarg example inputs to eager model base

jackzhxng · jackzhxng · commit 6cd759ddb8f2 · 2024-10-04T13:46:44.000-07:00
diff --git a/examples/models/llama2/model.py b/examples/models/llama2/model.py
@@ -250,32 +250,35 @@ def get_eager_model(self):
             # switch all to FP32
             return self.model_.to(torch.float32)
 
-    def get_example_inputs(self):
+    def get_example_inputs(self) -> Tuple[Tuple, Dict]:
         if self.use_kv_cache:
             return self.get_example_inputs_kvcache_sdpa()
         else:
-            return (
+            positional_inputs = (
                 torch.tensor(
                     [[1, 2, 3]], dtype=torch.long
                 ),  # tokens, with kv cache our input token length is always just 1 token.
             )
+            return (positional_inputs, {})
 
     # assumption is the custom op doesnt support dynamic shape right now. It might but its untested so lets first get static shape working
-    def get_example_inputs_kvcache_sdpa(self):
+    def get_example_inputs_kvcache_sdpa(self) -> Tuple[Tuple, Dict]:
         if self.enable_dynamic_shape:
-            return (
+            positional_inputs = (
                 torch.tensor([[2, 3, 4]], dtype=torch.long),
                 torch.tensor([0], dtype=torch.long),
             )
+            return (positional_inputs, {})
         else:
-            return (
+            positional_inputs = (
                 torch.tensor(
                     [[1]], dtype=torch.long
                 ),  # tokens, with kv cache our input token length is always just 1 token.
                 torch.tensor(
                     [0], dtype=torch.long
                 ),  # start_pos, what token of output are we on.
             )
+            return (positional_inputs, {})
 
     def _transform_for_pre_quantization(self, checkpoint):
         assert hasattr(self.args, "preq_mode"), "preq_mode must be specified"
diff --git a/examples/models/model_base.py b/examples/models/model_base.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from abc import ABC, abstractmethod
+from typing import Dict, Tuple
 
 import torch
 
@@ -37,11 +38,11 @@ def get_eager_model(self) -> torch.nn.Module:
         raise NotImplementedError("get_eager_model")
 
     @abstractmethod
-    def get_example_inputs(self):
+    def get_example_inputs(self) -> Tuple[Tuple, Dict]:
         """
         Abstract method to provide example inputs for the model.
 
         Returns:
-            Any: Example inputs that can be used for testing and tracing.
+            Tuple[Tuple, Dict]: The positional inputs (Tuple) and the kwarg inputs (Dict).
         """
         raise NotImplementedError("get_example_inputs")
diff --git a/extension/export_util/utils.py b/extension/export_util/utils.py
@@ -26,6 +26,8 @@
 def _to_core_aten(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     strict=True,
     verbose=True,
@@ -38,7 +40,11 @@ def _to_core_aten(
             f"Expected passed in model to be an instance of fx.GraphModule, got {type(model)}"
         )
     core_aten_ep = export(
-        model, example_inputs, dynamic_shapes=dynamic_shapes, strict=strict
+        model,
+        example_inputs,
+        example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
     )
     if verbose:
         logging.info(f"Core ATen graph:\n{core_aten_ep.graph}")
@@ -69,14 +75,21 @@ def _core_aten_to_edge(
 def export_to_edge(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Optional[Dict] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     edge_constant_methods: Optional[Dict[str, Any]] = None,
     edge_compile_config=_EDGE_COMPILE_CONFIG,
     strict=True,
     verbose=True,
 ) -> EdgeProgramManager:
     core_aten_ep = _to_core_aten(
-        model, example_inputs, dynamic_shapes, strict=strict, verbose=verbose
+        model,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+        verbose=verbose,
     )
     return _core_aten_to_edge(
         core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
@@ -86,6 +99,8 @@ def export_to_edge(
 def export_to_exec_prog(
     model: Union[torch.fx.GraphModule, torch.nn.Module],
     example_inputs: Tuple[Value, ...],
+    *,
+    example_kwarg_inputs: Dict[str, Any] = None,
     dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     edge_constant_methods: Optional[Dict[str, Any]] = None,
     edge_compile_config=_EDGE_COMPILE_CONFIG,
@@ -96,7 +111,13 @@ def export_to_exec_prog(
     # pre-autograd export. eventually this will become torch.export
     m = export_for_training(m, example_inputs).module()
 
-    core_aten_ep = _to_core_aten(m, example_inputs, dynamic_shapes, strict=strict)
+    core_aten_ep = _to_core_aten(
+        m,
+        example_inputs,
+        example_kwarg_inputs=example_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+        strict=strict,
+    )
 
     edge_m = _core_aten_to_edge(
         core_aten_ep, edge_constant_methods, edge_compile_config
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -10,7 +10,7 @@
 
 import logging
 from enum import Enum
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
 import torch
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
@@ -68,6 +68,7 @@ def __init__(
         dtype,
         use_kv_cache,
         example_inputs,
+        example_kwarg_inputs: Optional[Dict] = None,
         args: Optional[Any] = None,
         enable_dynamic_shape: bool = False,
         generate_full_logits: bool = False,
@@ -87,6 +88,7 @@ def __init__(
         self.max_seq_len = max_seq_len
         self.dtype = dtype
         self.example_inputs = example_inputs
+        self.example_kwarg_inputs = example_kwarg_inputs
         self.use_kv_cache = use_kv_cache
         self.generate_full_logits = generate_full_logits
         self.enable_dynamic_shape = enable_dynamic_shape
@@ -186,12 +188,16 @@ def capture_pre_autograd_graph(self) -> "LLMEdgeManager":
                 self.pre_autograd_graph_module = torch.export.export(
                     self.model,
                     self.example_inputs,
+                    self.example_kwarg_inputs,
                     dynamic_shapes=dynamic_shape,
                     strict=True,
                 ).module()
             else:
                 self.pre_autograd_graph_module = capture_pre_autograd_graph(
-                    self.model, self.example_inputs, dynamic_shapes=dynamic_shape
+                    self.model,
+                    self.example_inputs,
+                    kwargs=self.example_kwarg_inputs,
+                    dynamic_shapes=dynamic_shape,
                 )
 
         return self
@@ -340,6 +346,7 @@ def export_to_edge(self) -> "LLMEdgeManager":
             self.edge_manager = export_to_edge(
                 self.pre_autograd_graph_module,  # pyre-fixme[6]
                 self.example_inputs,
+                example_kwarg_inputs=self.example_kwarg_inputs,
                 dynamic_shapes=dynamic_shape,
                 edge_constant_methods=self.metadata,
                 edge_compile_config=edge_config,