Added kwarg support for dynamo.compile

cehongwang · cehongwang · commit 75dc485ebf09 · 2024-07-12T15:12:33.000-07:00
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -49,6 +49,7 @@ def compile(
     exported_program: ExportedProgram,
     inputs: Tuple[Any, ...],
     *,
+    kwarg_inputs: Any = None,
     device: Optional[Union[Device, torch.device, str]] = _defaults.DEVICE,
     disable_tf32: bool = _defaults.DISABLE_TF32,
     assume_dynamic_shape_support: bool = _defaults.ASSUME_DYNAMIC_SHAPE_SUPPORT,
@@ -148,7 +149,6 @@ def compile(
 
     if debug:
         set_log_level(logger.parent, logging.DEBUG)
-
     if "truncate_long_and_double" in kwargs.keys():
         if truncate_double is not _defaults.TRUNCATE_DOUBLE:
             raise ValueError(
@@ -173,6 +173,8 @@ def compile(
         else:
             make_refitable = kwargs["refit"]
 
+    if kwarg_inputs is None:
+        kwarg_inputs = {}
     engine_capability = EngineCapability._from(engine_capability)
 
     if torch_executed_modules is not None and torch_executed_modules:
@@ -186,22 +188,22 @@ def compile(
 
     # Prepare torch_trt inputs
     inputs = prepare_inputs(inputs)
-    torch_inputs = get_torch_inputs(inputs, device)
+    kwarg_inputs = prepare_inputs(kwarg_inputs)
     device = to_torch_tensorrt_device(device)
     enabled_precisions = {dtype._from(p) for p in enabled_precisions}
 
     if not isinstance(exported_program, ExportedProgram):
         raise AssertionError(
             f"Input graph should be an ExportedProgram but got type {type(exported_program)}"
         )
-    exported_program = pre_export_lowering(exported_program, torch_inputs)
+    exported_program = pre_export_lowering(exported_program, None)
     exported_program = exported_program.run_decompositions(
         get_decompositions(enable_experimental_decompositions)
     )
     gm = exported_program.module()
     logger.debug("Input graph: " + str(gm.graph))
     # Apply lowering on the graph module
-    gm = post_lowering(gm, torch_inputs)
+    gm = post_lowering(gm, None)
     logger.debug("Lowered Input graph: " + str(gm.graph))
 
     compilation_options = {
@@ -240,13 +242,14 @@ def compile(
 
     settings = CompilationSettings(**compilation_options)
     logger.info("Compilation Settings: %s\n", settings)
-    trt_gm = compile_module(gm, inputs, settings)
+    trt_gm = compile_module(gm, inputs, kwarg_inputs, settings)
     return trt_gm
 
 
 def compile_module(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[Input],
+    sample_kwarg_inputs: Any = None,
     settings: CompilationSettings = CompilationSettings(),
 ) -> torch.fx.GraphModule:
     """Compile a traced FX module
@@ -261,7 +264,8 @@ def compile_module(
         Compiled FX GraphModule
     """
     dryrun_tracker = DryRunTracker()
-
+    if sample_kwarg_inputs is None:
+        sample_kwarg_inputs = {}
     # Assume converters support dynamic shapes and disable validation
     CONVERTERS.set_dynamic_shape_support(settings.assume_dynamic_shape_support)
 
@@ -437,9 +441,13 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
 
             trt_modules[name] = trt_module
 
-    sample_outputs = gm(
-        *get_torch_inputs(sample_inputs, to_torch_device(settings.device))
+    torch_sample_inputs = get_torch_inputs(
+        sample_inputs, to_torch_device(settings.device)
+    )
+    torch_sample_kwarg_inputs = get_torch_inputs(
+        sample_kwarg_inputs, to_torch_device(settings.device)
     )
+    sample_outputs = gm(*torch_sample_inputs, **torch_sample_kwarg_inputs)
 
     if not isinstance(sample_outputs, (list, tuple)):
         sample_outputs = [sample_outputs]
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -128,24 +128,45 @@ def input_is_dynamic(inputs: Sequence[Union[Input, torch.Tensor]]) -> bool:
 
 
 def get_torch_inputs(
-    inputs: Sequence[Input], device: Union[Device, torch.device, str], mode: str = ""
-) -> Sequence[torch.tensor]:
+    inputs: Sequence[Input] | Dict[Any, Any],
+    device: Union[Device, torch.device, str],
+    mode: str = "",
+) -> Sequence[torch.tensor] | Dict[Any, Any]:
     """
     Return the torch_tensor from the Input object. If mode is set, this implies
     user is using dynamic shaped inputs and return the corresponding input based
     on the mode requested.
     """
     device = to_torch_device(device)
     if mode:
+        if isinstance(inputs, dict):
+            result = {}
+            for k, v in inputs.items():
+                if isinstance(v, (list, tuple, dict)):
+                    result[k] = get_torch_inputs(v, device)
+                else:
+                    result[k] = v.example_tensor(mode).to(device)
+            return result
+        else:
+            return [
+                input.example_tensor(mode).to(device)
+                for input in inputs
+                if isinstance(input, Input)
+            ]
+
+    if isinstance(inputs, dict):
+        result = {}
+        for k, v in inputs.items():
+            if isinstance(v, (list, tuple, dict)):
+                result[k] = get_torch_inputs(v, device)
+            else:
+                result[k] = v.torch_tensor.to(device)
+        return result
+    else:
         return [
-            input.example_tensor(mode).to(device)
+            input.torch_tensor.to(device) if isinstance(input, Input) else input
             for input in inputs
-            if isinstance(input, Input)
         ]
-    return [
-        input.torch_tensor.to(device) if isinstance(input, Input) else input
-        for input in inputs
-    ]
 
 
 def set_log_level(parent_logger: Any, level: Any) -> None:
diff --git a/tests/py/dynamo/models/test_models_export_kwargs.py b/tests/py/dynamo/models/test_models_export_kwargs.py
@@ -0,0 +1,73 @@
+# type: ignore
+import unittest
+
+import pytest
+import timm
+import torch
+import torch.nn.functional as F
+import torch_tensorrt as torchtrt
+import torchvision.models as models
+from torch import nn
+from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
+from transformers import BertModel
+from transformers.utils.fx import symbolic_trace as transformers_trace
+
+assertions = unittest.TestCase()
+
+
+@pytest.mark.unit
+def test_custom_model():
+    class net(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = nn.Conv2d(3, 12, 3, padding=1)
+            self.bn = nn.BatchNorm2d(12)
+            self.conv2 = nn.Conv2d(12, 12, 3, padding=1)
+            self.fc1 = nn.Linear(12 * 56 * 56, 10)
+
+        def forward(self, x, b=5, c=None, d=None):
+            x = self.conv1(x)
+            x = F.relu(x)
+            x = self.bn(x)
+            x = F.max_pool2d(x, (2, 2))
+            x = self.conv2(x)
+            x = F.relu(x)
+            x = F.max_pool2d(x, (2, 2))
+            x = torch.flatten(x, 1)
+            x = x + b
+            if c is not None:
+                x = x * c
+            if d is not None:
+                x = x - d["value"]
+            return self.fc1(x)
+
+    model = net().eval().to("cuda")
+    args = [torch.rand((1, 3, 224, 224)).to("cuda")]
+    kwargs = {
+        "b": torch.tensor(6).to("cuda"),
+        "d": {"value": torch.tensor(8).to("cuda")},
+    }
+
+    compile_spec = {
+        "inputs": args,
+        "kwarg_inputs": kwargs,
+        "device": torchtrt.Device("cuda:0"),
+        "enabled_precisions": {torch.float},
+        "pass_through_build_failures": True,
+        "optimization_level": 1,
+        "min_block_size": 1,
+        "ir": "dynamo",
+    }
+    # TODO: Support torchtrt.compile
+    # trt_mod = torchtrt.compile(model, **compile_spec)
+
+    exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs)
+    trt_mod = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    cos_sim = cosine_similarity(model(*args, **kwargs), trt_mod(*args, **kwargs)[0])
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+
+    # Clean up model env
+    torch._dynamo.reset()