Supported kwargs save

cehongwang · cehongwang · commit bac30990e4d8 · 2024-07-12T15:17:46.000-07:00
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -351,7 +351,7 @@ def convert_method_to_trt_engine(
         torchtrt_inputs = prepare_inputs(inputs)
         exp_program = torch_tensorrt.dynamo.trace(module, torchtrt_inputs, **kwargs)
 
-        return dynamo_convert_module_to_trt_engine(
+        return dynamo_convert_module_to_trt_engine(  # type: ignore
             exp_program,
             inputs=tuple(inputs),
             enabled_precisions=enabled_precisions_set,
@@ -408,6 +408,7 @@ def save(
     *,
     output_format: str = "exported_program",
     inputs: Optional[Sequence[torch.Tensor]] = None,
+    kwargs_inputs: Optional[dict[str, Any]] = None,
     retrace: bool = False,
 ) -> None:
     """
@@ -428,6 +429,10 @@ def save(
         raise ValueError(
             "Not all inputs provided are torch.tensors. Please provide torch.tensors as inputs"
         )
+    if kwargs_inputs is not None and not all(
+        value is not None for value in kwargs_inputs.values()
+    ):
+        raise ValueError("kwargs should not include None.")
     if output_format not in accepted_formats:
         raise ValueError(
             f"Provided output_format {output_format} is not supported. Supported options are exported_program | torchscript"
@@ -460,19 +465,21 @@ def save(
             )
         # The module type is torch.fx.GraphModule
         if output_format == "torchscript":
-            module_ts = torch.jit.trace(module, inputs)
+            module_ts = torch.jit.trace(
+                module, inputs, example_kwarg_inputs=kwargs_inputs
+            )
             torch.jit.save(module_ts, file_path)
         else:
             if not retrace:
                 from torch_tensorrt.dynamo._exporter import export
 
-                exp_program = export(module, inputs)
+                exp_program = export(module, inputs, kwargs_inputs)
                 torch.export.save(exp_program, file_path)
             else:
                 from torch._higher_order_ops.torchbind import enable_torchbind_tracing
 
                 with enable_torchbind_tracing():
                     exp_program = torch.export.export(
-                        module, tuple(inputs), strict=False
+                        module, tuple(inputs), kwargs=kwargs_inputs, strict=False
                     )
                     torch.export.save(exp_program, file_path)
diff --git a/py/torch_tensorrt/dynamo/_exporter.py b/py/torch_tensorrt/dynamo/_exporter.py
@@ -1,6 +1,6 @@
 import copy
 import operator
-from typing import Any, Dict, Sequence, Tuple, cast
+from typing import Any, Dict, Optional, Sequence, Tuple, cast
 
 import torch
 from torch._guards import detect_fake_mode
@@ -22,20 +22,23 @@
 def export(
     gm: torch.fx.GraphModule,
     inputs: Sequence[torch.Tensor],
+    kwargs_inputs: Optional[dict[str, Any]] = None,
 ) -> ExportedProgram:
     """Export the result of TensorRT compilation into the desired output format.
 
     Arguments:
         gm (torch.fx.GraphModule): Compiled Torch-TensorRT module, generated by ``torch_tensorrt.dynamo.compile``
         inputs (torch.Tensor): Torch input tensors
     """
-    patched_module = transform(gm, inputs)
+    patched_module = transform(gm, inputs, kwargs_inputs)
     exp_program = create_trt_exp_program(patched_module)
     return exp_program
 
 
 def transform(
-    gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor]
+    gm: torch.fx.GraphModule,
+    inputs: Sequence[torch.Tensor],
+    kwargs_inputs: Optional[dict[str, Any]] = None,
 ) -> torch.fx.GraphModule:
     """
     Transforms the graphmodule by inlining Pytorch and TensorRT submodules.
@@ -53,7 +56,7 @@ def transform(
     gm = copy.deepcopy(gm)
 
     # Run shape analysis
-    _, outputs_map = partitioning.run_shape_analysis(gm, inputs)
+    _, outputs_map = partitioning.run_shape_analysis(gm, inputs, kwargs_inputs)
 
     # Inline TensorRT submodules
     inline_trt_modules(gm, outputs_map)
diff --git a/py/torch_tensorrt/dynamo/partitioning/common.py b/py/torch_tensorrt/dynamo/partitioning/common.py
@@ -129,7 +129,9 @@ def construct_submodule_inputs(module: torch.fx.GraphModule) -> Sequence[Input]:
 
 
 def run_shape_analysis(
-    parent_module: torch.fx.GraphModule, inputs: Sequence[Input]
+    parent_module: torch.fx.GraphModule,
+    inputs: Sequence[Input],
+    kwargs_inputs: Optional[dict[str, Any]] = None,
 ) -> Tuple[Dict[Any, Sequence[Any]], Dict[Any, Sequence[Any]]]:
     submod_inputs_shape_map: Dict[Any, Sequence[Any]] = {}
     submod_outputs_shape_map: Dict[Any, Sequence[Any]] = {}
@@ -149,7 +151,7 @@ def get_submodule_io(
     for name, _ in parent_module.named_children():
         submodule = getattr(parent_module, name)
         handle = submodule.register_forward_hook(get_submodule_io)
-        parent_module(*inputs)
+        parent_module(*inputs, **kwargs_inputs)
         handle.remove()
         submod_inputs_shape_map[name] = (
             [input.shape for input in sub_inputs]