Merge remote-tracking branch 'origin/dynamo_tensor_freeze' into export_prototype

peri044 · peri044 · commit 09b099ae5c43 · 2023-09-08T18:06:03.000-07:00
diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
@@ -3,27 +3,30 @@
 import math
 import operator
 import warnings
-from typing import Dict, Optional, Sequence, Tuple, Union, cast
+from typing import cast, Dict, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
+
+from ..converter_registry import tensorrt_converter
+
+from ..tracer.acc_tracer import acc_ops
+from ..types import *  # noqa: F403
 from torch.fx.immutable_collections import immutable_list
 from torch.fx.node import Argument, Target
-from torch_tensorrt.fx.converters.impl import activation, convolution
+
+from ..utils import get_dynamic_dims, unified_dtype_converter, Frameworks
+
+from .converter_utils import *  # noqa: F403
 from torch_tensorrt.fx.passes.lower_basic_pass import (
     trt_transposed_linear,
     trt_transposed_matmul,
 )
 from torch_tensorrt.fx.tracer.acc_tracer.acc_ops import contiguous
-
-from ..converter_registry import tensorrt_converter
-from ..tracer.acc_tracer import acc_ops
-from ..types import *  # noqa: F403
-from ..utils import Frameworks, get_dynamic_dims, unified_dtype_converter
-from .converter_utils import *  # noqa: F403
+from torch_tensorrt.fx.converters.impl import activation, convolution
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
@@ -2711,14 +2714,8 @@ def acc_ops_linear(
         "dim for linear and it can't be the last dim."
     )
 
-    if isinstance(kwargs["weight"], (torch.Tensor, np.ndarray)):
-        weight = get_trt_tensor(
-            network,
-            kwargs["weight"].t()
-            if isinstance(kwargs["weight"], torch.Tensor)
-            else kwargs["weight"].T,
-            f"{name}_weight",
-        )
+    if isinstance(kwargs["weight"], torch.Tensor):
+        weight = get_trt_tensor(network, kwargs["weight"].t(), f"{name}_weight")
         if target not in (acc_ops.linear, torch.ops.aten.linear):
             weight_op = trt.MatrixOperation.TRANSPOSE
         else:
diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py
@@ -1,8 +1,8 @@
 import operator
 import warnings
-from enum import Enum, auto
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
+from enum import Enum, auto
 import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
@@ -20,7 +20,7 @@
     TRTPluginFieldCollection,
     TRTTensor,
 )
-from ..utils import Frameworks, unified_dtype_converter
+from ..utils import unified_dtype_converter, Frameworks
 
 
 class SourceIR(Enum):
@@ -271,7 +271,7 @@ def create_constant(
     """
     constant = network.add_constant(
         (1,) if isinstance(value, (int, float)) else value.shape,
-        to_numpy(value, dtype).copy(),
+        to_numpy(value, dtype),
     )
     constant.name = name
     return constant.get_output(0)
@@ -311,7 +311,7 @@ def get_trt_tensor(
     elif isinstance(input_val, np.ndarray) and (
         input_val.dtype == np.bool_ or input_val.dtype == np.int64
     ):
-        input_val = input_val.astype(np.int32)
+        input_val = input_val.to(np.int32)
 
     if isinstance(input_val, (torch.Tensor, np.ndarray, int, float)):
         return create_constant(network, input_val, name, dtype)
diff --git a/py/torch_tensorrt/fx/converters/impl/convolution.py b/py/torch_tensorrt/fx/converters/impl/convolution.py
@@ -1,23 +1,27 @@
-from typing import Any, Optional, Sequence, Union
-
 import numpy as np
+from typing import Any, Optional, Sequence, Union
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
 from torch.fx.node import Target
-from torch_tensorrt.fx.converters import acc_ops_converters
+
 from torch_tensorrt.fx.converters.converter_utils import (
     SourceIR,
     extend_attr_to_tuple,
     get_dyn_range,
-    get_trt_tensor,
-    has_dynamic_shape,
     mark_as_int8_layer,
     set_layer_name,
+    has_dynamic_shape,
     to_numpy,
+    get_trt_tensor,
+)
+from torch_tensorrt.fx.converters import acc_ops_converters
+
+from torch_tensorrt.fx.types import (
+    TRTNetwork,
+    TRTTensor,
 )
-from torch_tensorrt.fx.types import TRTNetwork, TRTTensor
 
 
 def convNd(
@@ -50,7 +54,7 @@ def convNd(
         )
 
     # Process bias terms
-    if isinstance(bias, (torch.Tensor, np.ndarray)):
+    if isinstance(bias, torch.Tensor):
         # Transform the bias constant into a Numpy array
         bias = to_numpy(bias)
 
@@ -75,7 +79,7 @@ def convNd(
                 network, target, tuple(), kwargs, name + "_unsqueeze_weight"
             )
 
-    elif isinstance(weight, (torch.Tensor, np.ndarray)):
+    elif isinstance(weight, torch.Tensor):
         # Transform the weight constant into a Numpy array
         weight = to_numpy(weight)
 
diff --git a/tests/py/dynamo/backend/test_specialized_models.py b/tests/py/dynamo/backend/test_specialized_models.py
@@ -2,7 +2,7 @@
 import torch_tensorrt
 from torch.testing._internal.common_utils import TestCase, run_tests
 
-from ..testing_utilities import lower_graph_testing
+from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing
 
 
 class TestFakeTensors(TestCase):
@@ -157,5 +157,84 @@ def forward(self, x):
         torch._dynamo.reset()
 
 
+class TestTensorFreezing(TestCase):
+    def test_tensor_freeze_attr(self):
+        class TensorFreeze(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.const = torch.ones((8, 2), device="cuda")
+
+            def forward(self, x):
+                return x @ self.const
+
+        inputs = [
+            torch.ones(
+                7,
+                8,
+            ).cuda()
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(TensorFreeze())
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = torch_tensorrt.compile(
+            fx_graph,
+            "torch_compile",
+            inputs,
+            min_block_size=1,
+            pass_through_build_failures=True,
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            DECIMALS_OF_AGREEMENT,
+            msg=f"Frozen-Tensor TRT outputs don't match with the original model.",
+        )
+        torch._dynamo.reset()
+
+    def test_constant_fold(self):
+        class Arange(torch.nn.Module):
+            def forward(self, x):
+                y = torch.arange(10, device="cuda")
+                return x + y
+
+        inputs = [
+            torch.rand(
+                10,
+                10,
+            ).cuda()
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(Arange())
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = torch_tensorrt.compile(
+            fx_graph,
+            "torch_compile",
+            inputs,
+            min_block_size=1,
+            pass_through_build_failures=True,
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            DECIMALS_OF_AGREEMENT,
+            msg=f"Constant Folded TRT outputs don't match with the original model.",
+        )
+        torch._dynamo.reset()
+
+
 if __name__ == "__main__":
     run_tests()