Added dynamic shape support for kwargs and dynamo.trace

cehongwang · cehongwang · commit d9517438ef8b · 2024-07-18T14:41:51.000-07:00
diff --git a/py/torch_tensorrt/dynamo/_tracer.py b/py/torch_tensorrt/dynamo/_tracer.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, Optional, Tuple
+from inspect import signature
+from typing import Any, Optional, Tuple, Union
 
 import torch
 from torch.export import Dim, export
@@ -76,14 +77,58 @@ def trace(
     device = to_torch_device(kwargs.get("device", default_device()))
     torch_arg_inputs = get_torch_inputs(arg_inputs, device)
     torch_kwarg_inputs = get_torch_inputs(kwarg_inputs, device)
-    dynamic_shapes = []
-    for input in arg_inputs:  # type: ignore
-        if isinstance(input, Input) and input.shape_mode == Input._ShapeMode.DYNAMIC:
+    dynamic_shapes = get_dynamic_shapes_args(mod, arg_inputs)
+    dynamic_shapes.update(get_dynamic_shapes_kwargs(kwarg_inputs))
+    # breakpoint()
+    exp_program = export(
+        mod,
+        tuple(torch_arg_inputs),
+        kwargs=torch_kwarg_inputs,
+        dynamic_shapes=dynamic_shapes,
+    )
+
+    return exp_program
+
+
+def get_dynamic_shapes_kwargs(inputs: Any) -> Union[dict[str, Any], list[Any]]:
+    if isinstance(inputs, dict):
+        dynamic_shapes_kwarg = {}
+        for k, v in inputs.items():
+            dynamic_shapes_kwarg[k] = get_dynamic_shapes_kwargs(v)
+        return dynamic_shapes_kwarg
+
+    elif isinstance(inputs, Input):
+        return get_dynamic_shapes(inputs)
+
+    elif isinstance(inputs, (list, tuple)):
+        dynamic_shapes = []
+        for input in inputs:
+            dynamic_shapes.append(get_dynamic_shapes(input))
+        return dynamic_shapes
+
+    raise TypeError(f"Unknown type {type(inputs)}.")
+
+
+def get_dynamic_shapes_args(mod: torch.nn.Module, inputs: Any) -> dict[str, Any]:
+    # dynamic_shape is a dict and cannot work without keys. Here we use position argument name
+    # in forward function as the name
+    args = list(signature(mod.forward).parameters.keys())
+    dynamic_shapes = {}
+    for input, input_name in zip(inputs, args[: len(inputs)]):
+        dynamic_shapes[input_name] = get_dynamic_shapes(input)
+    return dynamic_shapes
+
+
+def get_dynamic_shapes(input: Input) -> dict[Any, Any]:
+    if not isinstance(input, Input):
+        raise TypeError(f"Expected type torch_trt.Input, but got {type(input)}")
+    else:
+        dynamic_dims = {}
+        if input.shape_mode == Input._ShapeMode.DYNAMIC:
             min_shape = input.shape["min_shape"]
             opt_shape = input.shape["opt_shape"]
             max_shape = input.shape["max_shape"]
             assert len(min_shape) == len(opt_shape) == len(max_shape)
-            dynamic_dims = {}
             for dim in range(len(min_shape)):
                 if min_shape[dim] == opt_shape[dim] == max_shape[dim]:
                     continue
@@ -93,14 +138,4 @@ def trace(
                         min=min_shape[dim],
                         max=max_shape[dim],
                     )
-
-            dynamic_shapes.append(dynamic_dims)
-
-    exp_program = export(
-        mod,
-        tuple(torch_arg_inputs),
-        kwargs=torch_kwarg_inputs,
-        dynamic_shapes=tuple(dynamic_shapes),
-    )
-
-    return exp_program
+        return dynamic_dims
diff --git a/tests/py/dynamo/models/test_export_kwargs_serde.py b/tests/py/dynamo/models/test_export_kwargs_serde.py
@@ -11,13 +11,135 @@
 import torchvision.models as models
 from torch import nn
 from torch_tensorrt.dynamo._compiler import convert_module_to_trt_engine
-from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
+from torch_tensorrt.dynamo.utils import (
+    COSINE_THRESHOLD,
+    cosine_similarity,
+    prepare_inputs,
+)
 
 assertions = unittest.TestCase()
 
 
+# @pytest.mark.unit
+# def test_custom_model():
+#     class net(nn.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.conv1 = nn.Conv2d(3, 12, 3, padding=1)
+#             self.bn = nn.BatchNorm2d(12)
+#             self.conv2 = nn.Conv2d(12, 12, 3, padding=1)
+#             self.fc1 = nn.Linear(12 * 56 * 56, 10)
+
+#         def forward(self, x, b=5, c=None, d=None):
+#             x = self.conv1(x)
+#             x = F.relu(x)
+#             x = self.bn(x)
+#             x = F.max_pool2d(x, (2, 2))
+#             x = self.conv2(x)
+#             x = F.relu(x)
+#             x = F.max_pool2d(x, (2, 2))
+#             x = torch.flatten(x, 1)
+#             x = x + b
+#             if c is not None:
+#                 x = x * c
+#             if d is not None:
+#                 x = x - d["value"]
+#             return self.fc1(x)
+
+#     model = net().eval().to("cuda")
+#     args = [torch.rand((1, 3, 224, 224)).to("cuda")]
+#     kwargs = {
+#         "b": torch.tensor(6).to("cuda"),
+#         "d": {"value": torch.tensor(8).to("cuda")},
+#     }
+
+#     compile_spec = {
+#         "inputs": args,
+#         "kwarg_inputs": kwargs,
+#         "device": torchtrt.Device("cuda:0"),
+#         "enabled_precisions": {torch.float},
+#         "pass_through_build_failures": True,
+#         "optimization_level": 1,
+#         "min_block_size": 1,
+#         "ir": "dynamo",
+#     }
+
+#     exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     cos_sim = cosine_similarity(model(*args, **kwargs), trt_gm(*args, **kwargs)[0])
+#     assertions.assertTrue(
+#         cos_sim > COSINE_THRESHOLD,
+#         msg=f"CustomKwargs Module TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#     )
+
+#     # Save the module
+#     trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep")
+#     torchtrt.save(trt_gm, trt_ep_path, inputs=args, kwargs_inputs=kwargs)
+#     # Clean up model env
+#     torch._dynamo.reset()
+
+
+# @pytest.mark.unit
+# def test_custom_model_with_dynamo_trace():
+#     class net(nn.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.conv1 = nn.Conv2d(3, 12, 3, padding=1)
+#             self.bn = nn.BatchNorm2d(12)
+#             self.conv2 = nn.Conv2d(12, 12, 3, padding=1)
+#             self.fc1 = nn.Linear(12 * 56 * 56, 10)
+
+#         def forward(self, x, b=5, c=None, d=None):
+#             x = self.conv1(x)
+#             x = F.relu(x)
+#             x = self.bn(x)
+#             x = F.max_pool2d(x, (2, 2))
+#             x = self.conv2(x)
+#             x = F.relu(x)
+#             x = F.max_pool2d(x, (2, 2))
+#             x = torch.flatten(x, 1)
+#             x = x + b
+#             if c is not None:
+#                 x = x * c
+#             if d is not None:
+#                 x = x - d["value"]
+#             return self.fc1(x)
+
+#     model = net().eval().to("cuda")
+#     args = [torch.rand((1, 3, 224, 224)).to("cuda")]
+#     kwargs = {
+#         "b": torch.tensor(6).to("cuda"),
+#         "d": {"value": torch.tensor(8).to("cuda")},
+#     }
+
+#     compile_spec = {
+#         "inputs": prepare_inputs(args),
+#         "kwarg_inputs": prepare_inputs(kwargs),
+#         "device": torchtrt.Device("cuda:0"),
+#         "enabled_precisions": {torch.float},
+#         "pass_through_build_failures": True,
+#         "optimization_level": 1,
+#         "min_block_size": 1,
+#         "ir": "dynamo",
+#     }
+
+#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     cos_sim = cosine_similarity(model(*args, **kwargs), trt_gm(*args, **kwargs)[0])
+#     assertions.assertTrue(
+#         cos_sim > COSINE_THRESHOLD,
+#         msg=f"CustomKwargs Module TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#     )
+
+#     # Save the module
+#     trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep")
+#     torchtrt.save(trt_gm, trt_ep_path, inputs=args, kwargs_inputs=kwargs)
+#     # Clean up model env
+#     torch._dynamo.reset()
+
+
 @pytest.mark.unit
-def test_custom_model():
+def test_custom_model_with_dynamo_trace_dynamic():
     class net(nn.Module):
         def __init__(self):
             super().__init__()
@@ -50,8 +172,17 @@ def forward(self, x, b=5, c=None, d=None):
     }
 
     compile_spec = {
-        "inputs": args,
-        "kwarg_inputs": kwargs,
+        # "arg_inputs": prepare_inputs(args),
+        "inputs": [
+            torchtrt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(4, 3, 224, 224),
+                max_shape=(8, 3, 224, 224),
+                dtype=torch.float32,
+                name="x",
+            )
+        ],
+        "kwarg_inputs": prepare_inputs(kwargs),
         "device": torchtrt.Device("cuda:0"),
         "enabled_precisions": {torch.float},
         "pass_through_build_failures": True,
@@ -60,7 +191,88 @@ def forward(self, x, b=5, c=None, d=None):
         "ir": "dynamo",
     }
 
-    exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs)
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    cos_sim = cosine_similarity(model(*args, **kwargs), trt_gm(*args, **kwargs)[0])
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"CustomKwargs Module TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+
+    # Save the module
+    trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep")
+    torchtrt.save(trt_gm, trt_ep_path, inputs=args, kwargs_inputs=kwargs)
+    # Clean up model env
+    torch._dynamo.reset()
+
+
+@pytest.mark.unit
+def test_custom_model_with_dynamo_trace_dynamic_complex():
+    ir = "dynamo"
+
+    class net(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = nn.Conv2d(3, 12, 3, padding=1)
+            self.bn = nn.BatchNorm2d(12)
+            self.conv2 = nn.Conv2d(12, 12, 3, padding=1)
+            self.fc1 = nn.Linear(12 * 56 * 56, 10)
+
+        def forward(self, x, b=None, c=None, d=None, e=[]):
+            x = self.conv1(x)
+            x = F.relu(x)
+            x = self.bn(x)
+            x = F.max_pool2d(x, (2, 2))
+            x = self.conv2(x)
+            x = F.relu(x)
+            x = F.max_pool2d(x, (2, 2))
+            x = torch.flatten(x, 1)
+            x = x @ b
+            if c is not None:
+                x = x * c
+            if d is not None:
+                x = x - d["value"]
+            for n in e:
+                x += n
+            return x
+
+    model = net().eval().to("cuda")
+    args = [torch.rand((1, 3, 224, 224)).to("cuda")]
+    kwargs = {
+        "b": torch.rand((37632, 10)).to("cuda"),
+        "d": {"value": torch.tensor(8).to("cuda")},
+        "e": [torch.tensor(8).to("cuda"), torch.tensor(10).to("cuda")],
+    }
+    model(*args, **kwargs)
+    kwarg_torchtrt_input = prepare_inputs(kwargs)
+    kwarg_torchtrt_input["b"] = torchtrt.Input(
+        min_shape=(37632, 1),
+        opt_shape=(37632, 5),
+        max_shape=(37632, 10),
+        dtype=torch.float32,
+        name="b",
+    )
+    compile_spec = {
+        # "arg_inputs": prepare_inputs(args),
+        "inputs": [
+            torchtrt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(4, 3, 224, 224),
+                max_shape=(8, 3, 224, 224),
+                dtype=torch.float32,
+                name="x",
+            ),
+        ],
+        "kwarg_inputs": kwarg_torchtrt_input,
+        "device": torchtrt.Device("cuda:0"),
+        "enabled_precisions": {torch.float},
+        "pass_through_build_failures": True,
+        "optimization_level": 1,
+        "min_block_size": 1,
+        "ir": "dynamo",
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
     cos_sim = cosine_similarity(model(*args, **kwargs), trt_gm(*args, **kwargs)[0])
     assertions.assertTrue(
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py