Add kwarg example inputs to eager model base

jackzhxng · jackzhxng · commit 0b5a9a709a41 · 2024-10-15T13:58:49.000-07:00
diff --git a/examples/models/llama2/model.py b/examples/models/llama2/model.py
@@ -224,32 +224,35 @@ def get_eager_model(self) -> torch.nn.Module:
             # switch all to FP32
             return self.model_.to(torch.float32)
 
-    def get_example_inputs(self):
+    def get_example_inputs(self) -> Tuple[Tuple, Dict]:
         if self.use_kv_cache:
             return self.get_example_inputs_kvcache_sdpa()
         else:
-            return (
+            positional_inputs = (
                 torch.tensor(
                     [[1, 2, 3]], dtype=torch.long
                 ),  # tokens, with kv cache our input token length is always just 1 token.
             )
+            return (positional_inputs, {})
 
     # assumption is the custom op doesnt support dynamic shape right now. It might but its untested so lets first get static shape working
-    def get_example_inputs_kvcache_sdpa(self):
+    def get_example_inputs_kvcache_sdpa(self) -> Tuple[Tuple, Dict]:
         if self.enable_dynamic_shape:
-            return (
+            positional_inputs = (
                 torch.tensor([[2, 3, 4]], dtype=torch.long),
                 torch.tensor([0], dtype=torch.long),
             )
+            return (positional_inputs, {})
         else:
-            return (
+            positional_inputs = (
                 torch.tensor(
                     [[1]], dtype=torch.long
                 ),  # tokens, with kv cache our input token length is always just 1 token.
                 torch.tensor(
                     [0], dtype=torch.long
                 ),  # start_pos, what token of output are we on.
             )
+            return (positional_inputs, {})
 
     def _transform_for_pre_quantization(self, checkpoint):
         assert hasattr(self.args, "preq_mode"), "preq_mode must be specified"
diff --git a/examples/models/model_base.py b/examples/models/model_base.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from abc import ABC, abstractmethod
+from typing import Dict, Tuple
 
 import torch
 
@@ -37,11 +38,11 @@ def get_eager_model(self) -> torch.nn.Module:
         raise NotImplementedError("get_eager_model")
 
     @abstractmethod
-    def get_example_inputs(self):
+    def get_example_inputs(self) -> Tuple[Tuple, Dict]:
         """
         Abstract method to provide example inputs for the model.
 
         Returns:
-            Any: Example inputs that can be used for testing and tracing.
+            Tuple[Tuple, Dict]: The positional inputs (Tuple) and the kwarg inputs (Dict).
         """
         raise NotImplementedError("get_example_inputs")