fix: Add test cases and improve backend

gs-olive · gs-olive · commit 6d2e01a02924 · 2023-04-11T10:32:39.000-07:00
- Add support for Input objects, add utilities
- Add modeling e2e test cases for Dynamo backend
- Improve defaults and settings in Dynamo class
diff --git a/py/torch_tensorrt/_Input.py b/py/torch_tensorrt/_Input.py
@@ -237,6 +237,27 @@ def _supported_input_size_type(input_size: Any) -> bool:
         else:
             return False
 
+    @staticmethod
+    def _dtype_to_torch_type(dtype: _enums.dtype) -> torch.dtype:
+        if isinstance(dtype, _enums.dtype):
+            if dtype == _enums.dtype.long:
+                return torch.long
+            elif dtype == _enums.dtype.int32:
+                return torch.int32
+            elif dtype == _enums.dtype.half:
+                return torch.half
+            elif dtype == _enums.dtype.float:
+                return torch.float
+            elif dtype == _enums.dtype.bool:
+                return torch.bool
+            else:
+                raise TypeError(
+                    "Provided an unsupported data type as an input data type (support: bool, int32, long, half, float), got: "
+                    + str(dtype)
+                )
+        else:
+            raise ValueError("Did not provide an _enums.dtype type as input.")
+
     @staticmethod
     def _parse_dtype(dtype: Any) -> _enums.dtype:
         if isinstance(dtype, torch.dtype):
@@ -416,8 +437,10 @@ def example_tensor(self, optimization_profile_field: str = None) -> torch.Tensor
             )
 
         if self.shape_mode == Input._ShapeMode.STATIC:
-            return torch.randn(self.shape).to(dtype=self.dtype)
+            return torch.rand(self.shape).to(
+                dtype=Input._dtype_to_torch_type(self.dtype)
+            )
         else:
-            return torch.randn(self.shape[optimization_profile_field]).to(
-                dtype=self.dtype
+            return torch.rand(self.shape[optimization_profile_field]).to(
+                dtype=Input._dtype_to_torch_type(self.dtype)
             )
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
@@ -1,13 +1,15 @@
 import torch
 import logging
+import collections.abc
 import torch_tensorrt
 from functools import partial
 
-from typing import Sequence, Any
+from typing import Any
 from torch_tensorrt import EngineCapability, Device
 from torch_tensorrt.fx.utils import LowerPrecision
 
 from torch_tensorrt.dynamo._settings import CompilationSettings
+from torch_tensorrt.dynamo.utils import prepare_inputs, prepare_device
 from torch_tensorrt.dynamo.backends import tensorrt_backend
 from torch_tensorrt.dynamo._defaults import (
     PRECISION,
@@ -22,7 +24,7 @@
 
 def compile(
     gm: torch.nn.Module,
-    example_inputs: Sequence[Any],
+    inputs: Any,
     *,
     device=Device._current_device(),
     disable_tf32=False,
@@ -51,6 +53,11 @@ def compile(
         + "{enabled_precisions, debug, workspace_size, max_num_trt_engines}"
     )
 
+    if not isinstance(inputs, collections.abc.Sequence):
+        inputs = [inputs]
+
+    inputs = prepare_inputs(inputs, prepare_device(device))
+
     if (
         torch.float16 in enabled_precisions
         or torch_tensorrt.dtype.half in enabled_precisions
@@ -79,7 +86,7 @@ def compile(
     model = torch.compile(gm, backend=custom_backend)
 
     # Ensure compilation occurs by calling the function with provided inputs
-    model(*example_inputs)
+    model(*inputs)
 
     return model
 
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -4,4 +4,4 @@
 PRECISION = LowerPrecision.FP32
 DEBUG = False
 MAX_WORKSPACE_SIZE = 20 << 30
-MAX_NUM_TRT_ENGINES = 10
+MAX_NUM_TRT_ENGINES = 200
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -11,7 +11,7 @@
 
 @dataclass(frozen=True)
 class CompilationSettings:
-    precision: LowerPrecision = (PRECISION,)
-    debug: bool = (DEBUG,)
-    workspace_size: int = (MAX_WORKSPACE_SIZE,)
-    max_num_trt_engines: int = (MAX_NUM_TRT_ENGINES,)
+    precision: LowerPrecision = PRECISION
+    debug: bool = DEBUG
+    workspace_size: int = MAX_WORKSPACE_SIZE
+    max_num_trt_engines: int = MAX_NUM_TRT_ENGINES
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -0,0 +1,66 @@
+import torch
+
+from typing import Any, Union, Sequence, Dict
+from torch_tensorrt import _Input, Device
+
+
+def prepare_inputs(
+    inputs: Union[_Input.Input, torch.Tensor, Sequence, Dict],
+    device: torch.device = torch.device("cuda"),
+) -> Any:
+    if isinstance(inputs, _Input.Input):
+        if isinstance(inputs.shape, dict):
+            return inputs.example_tensor(optimization_profile_field="opt_shape").to(
+                device
+            )
+        else:
+            return inputs.example_tensor().to(device)
+
+    elif isinstance(inputs, torch.Tensor):
+        return inputs
+
+    elif isinstance(inputs, list):
+        prepared_input = list()
+
+        for input_obj in inputs:
+            prepared_input.append(prepare_inputs(input_obj))
+
+        return prepared_input
+
+    elif isinstance(inputs, tuple):
+        prepared_input = list()
+
+        for input_obj in inputs:
+            prepared_input.append(prepare_inputs(input_obj))
+
+        return tuple(prepared_input)
+
+    elif isinstance(inputs, dict):
+        prepared_input = dict()
+
+        for key, input_obj in inputs.items():
+            prepared_input[key] = prepare_inputs(input_obj)
+
+        return prepared_input
+
+    else:
+        raise ValueError(
+            f"Invalid input type {type(inputs)} encountered in the torch_compile input parsing. "
+            + "Allowed input types: {torch_tensorrt.Input, torch.Tensor, list, tuple, dict}"
+        )
+
+
+def prepare_device(device: Union[Device, torch.device]) -> torch.device:
+    if isinstance(device, Device):
+        if device.gpu_id != -1:
+            device = torch.device(device.gpu_id)
+        else:
+            raise ValueError("Invalid GPU ID provided for the CUDA device provided")
+
+    elif isinstance(device, torch.device):
+        device = device
+
+    else:
+        raise ValueError(
+            "Invalid device provided. Supported options: torch.device | torch_tensorrt.Device"
+        )
diff --git a/tests/py/api/test_dynamo_backend.py b/tests/py/api/test_dynamo_backend.py
@@ -0,0 +1,136 @@
+import unittest
+import torch
+import timm
+
+import torch_tensorrt as torchtrt
+import torchvision.models as models
+
+from transformers import BertModel
+from utils import COSINE_THRESHOLD, cosine_similarity
+
+
+class TestModels(unittest.TestCase):
+    def test_resnet18(self):
+        self.model = models.resnet18(pretrained=True).eval().to("cuda")
+        self.input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+        compile_spec = {
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape, dtype=torch.float, format=torch.contiguous_format
+                )
+            ],
+            "device": torchtrt.Device("cuda:0"),
+            "enabled_precisions": {torch.float},
+        }
+
+        trt_mod = torchtrt.dynamo.compile(self.model, **compile_spec)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    def test_mobilenet_v2(self):
+        self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda")
+        self.input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+        compile_spec = {
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape, dtype=torch.float, format=torch.contiguous_format
+                )
+            ],
+            "device": torchtrt.Device("cuda:0"),
+            "enabled_precisions": {torch.float},
+        }
+
+        trt_mod = torchtrt.dynamo.compile(self.model, **compile_spec)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    def test_efficientnet_b0(self):
+        self.model = (
+            timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda")
+        )
+        self.input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+        compile_spec = {
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape, dtype=torch.float, format=torch.contiguous_format
+                )
+            ],
+            "device": torchtrt.Device("cuda:0"),
+            "enabled_precisions": {torch.float},
+        }
+
+        trt_mod = torchtrt.dynamo.compile(self.model, **compile_spec)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    def test_bert_base_uncased(self):
+        self.model = BertModel.from_pretrained("bert-base-uncased").cuda().eval()
+        self.input = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
+        self.input2 = torch.randint(0, 1, (1, 14), dtype=torch.int32).to("cuda")
+
+        compile_spec = {
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape,
+                    dtype=self.input.dtype,
+                    format=torch.contiguous_format,
+                ),
+                torchtrt.Input(
+                    self.input.shape,
+                    dtype=self.input.dtype,
+                    format=torch.contiguous_format,
+                ),
+            ],
+            "device": torchtrt.Device("cuda:0"),
+            "enabled_precisions": {torch.float},
+            "truncate_long_and_double": True,
+            "debug": True,
+        }
+        trt_mod = torchtrt.dynamo.compile(self.model, **compile_spec)
+
+        model_outputs = self.model(self.input, self.input2)
+        trt_model_outputs = trt_mod(self.input, self.input2)
+        for key in model_outputs.keys():
+            out, trt_out = model_outputs[key], trt_model_outputs[key]
+            cos_sim = cosine_similarity(out, trt_out)
+            self.assertTrue(
+                cos_sim > COSINE_THRESHOLD,
+                msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            )
+
+    def test_resnet18_half(self):
+        self.model = models.resnet18(pretrained=True).eval().to("cuda").half()
+        self.input = torch.randn((1, 3, 224, 224)).to("cuda").half()
+
+        compile_spec = {
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape, dtype=torch.half, format=torch.contiguous_format
+                )
+            ],
+            "device": torchtrt.Device("cuda:0"),
+            "enabled_precisions": {torch.half},
+        }
+
+        trt_mod = torchtrt.dynamo.compile(self.model, **compile_spec)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"Resnet50 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()