feat: Add preliminary support for freezing tensors in Dynamo

gs-olive · gs-olive · commit 51266dbdc6fc · 2023-09-05T11:43:40.000-07:00
fix: Refactor tensor freezing in Dynamo

Key op fixes for failing tests
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
 import logging
-from functools import partial
+import unittest
 from typing import Any, Callable, Sequence
 
 import torch
 import torch._dynamo as td
-from torch._functorch.aot_autograd import aot_module_simplified, make_boxed_compiler
+from torch._dynamo.utils import detect_fake_mode
+from torch._functorch.aot_autograd import aot_export_joint_simple
 from torch_tensorrt.dynamo import CompilationSettings
 from torch_tensorrt.dynamo.compile import compile_module
 from torch_tensorrt.dynamo.lowering._decompositions import get_decompositions
@@ -33,8 +34,7 @@ def torch_tensorrt_backend(
 
     DEFAULT_BACKEND = aot_torch_tensorrt_aten_backend
 
-    compiled_mod: torch.nn.Module = DEFAULT_BACKEND(gm, sample_inputs, **kwargs)
-    return compiled_mod
+    return DEFAULT_BACKEND(gm, sample_inputs, **kwargs)
 
 
 @td.register_backend(name="aot_torch_tensorrt_aten")  # type: ignore[misc]
@@ -43,21 +43,26 @@ def aot_torch_tensorrt_aten_backend(
 ) -> torch.nn.Module:
     settings = parse_dynamo_kwargs(kwargs)
 
-    custom_backend = partial(
-        _pretraced_backend,
-        settings=settings,
-    )
-
     # Perform Pre-AOT Lowering for Module-Level Replacement
     gm = pre_aot_substitutions(gm)
 
-    # Invoke AOTAutograd to translate operators to aten
-    return aot_module_simplified(
-        gm,
-        sample_inputs,
-        fw_compiler=make_boxed_compiler(custom_backend),
-        decompositions=get_decompositions(settings.enable_experimental_decompositions),
-    )
+    fake_mode = detect_fake_mode(sample_inputs)
+
+    # Place backend tracing within FakeTensor context allowing nonfake Tensors
+    with unittest.mock.patch.object(
+        fake_mode, "allow_non_fake_inputs", True
+    ), fake_mode:
+        # Invoke AOTAutograd to translate operators to aten
+        graph_module = aot_export_joint_simple(
+            gm,
+            sample_inputs,
+            trace_joint=False,
+            decompositions=get_decompositions(
+                settings.enable_experimental_decompositions
+            ),
+        )
+
+        return _pretraced_backend(graph_module, sample_inputs, settings)
 
 
 def _pretraced_backend(
diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
@@ -3,14 +3,15 @@
 from datetime import datetime
 from typing import Any, Callable, Dict, List, NamedTuple, Optional, Sequence, Set
 
-import numpy
+import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
 import torch.fx
 from torch.fx.node import _get_qualified_name
 from torch.fx.passes.shape_prop import TensorMetadata
+from torch.utils._python_dispatch import _disable_current_modes
 from torch_tensorrt._Input import Input
 from torch_tensorrt.dynamo.conversion.converter_utils import get_node_name
 from torch_tensorrt.fx.observer import Observer
@@ -169,7 +170,7 @@ def run(
 
         cache = None
         if timing_cache:
-            cache_file = numpy.array(timing_cache)
+            cache_file = np.array(timing_cache)
             cache = builder_config.create_timing_cache(cache_file.tobytes())
         else:
             cache = builder_config.create_timing_cache(b"")
@@ -323,6 +324,21 @@ def call_function(self, target: str, args: Any, kwargs: Any) -> Any:
         assert self._cur_node_name is not None
         return converter(self.network, target, args, kwargs, self._cur_node_name)
 
+    def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray:
+        with _disable_current_modes():
+            from torch_tensorrt.fx.converters import to_numpy
+
+            frozen_attr = self.fetch_attr(target)
+
+            if isinstance(frozen_attr, torch.nn.Parameter):
+                constant_tensor = frozen_attr.data
+            else:
+                constant_tensor = frozen_attr
+
+            network_constant = to_numpy(constant_tensor)
+
+        return network_constant
+
     def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
         assert isinstance(target, str)
         converter = CONVERTERS.get(self._cur_node)
@@ -344,6 +360,17 @@ def output(self, target: str, args: Any, kwargs: Any) -> List[Any]:
         else:
             outputs = (args[0],)
 
+        for output_idx in range(len(outputs)):
+            from torch_tensorrt.fx.converters import get_trt_tensor
+
+            output = outputs[output_idx]
+
+            if not isinstance(output, trt.tensorrt.ITensor):
+                new_output = get_trt_tensor(self.network, output, target)
+                outputs = (
+                    outputs[:output_idx] + (new_output,) + outputs[output_idx + 1 :]
+                )
+
         if not all(isinstance(output, trt.tensorrt.ITensor) for output in outputs):
             raise RuntimeError("TensorRT requires all outputs to be Tensor!")
 
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py
@@ -2,6 +2,7 @@
 from typing import Any, List, Optional, Sequence, Union, cast
 
 import numpy as np
+import tensorrt as trt
 import torch
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -19,8 +20,6 @@
 from torch_tensorrt.fx.types import TRTNetwork, TRTTensor
 from torch_tensorrt.fx.utils import get_dynamic_dims
 
-import tensorrt as trt
-
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
@@ -101,9 +100,15 @@ def layer_norm(
             "of the TensorRT region!"
         )
 
-    gamma = weight.detach().cpu().float().numpy()
+    gamma = (
+        weight.detach().cpu().float().numpy()
+        if isinstance(weight, torch.Tensor)
+        else weight
+    )
     gamma_field = trt.PluginField("gamma", gamma, trt.PluginFieldType.FLOAT32)
-    beta = bias.detach().cpu().float().numpy()
+    beta = (
+        bias.detach().cpu().float().numpy() if isinstance(bias, torch.Tensor) else bias
+    )
     beta_field = trt.PluginField("beta", beta, trt.PluginFieldType.FLOAT32)
     eps_field = trt.PluginField(
         "eps", np.array(eps, dtype=np.float32), trt.PluginFieldType.FLOAT32
diff --git a/py/torch_tensorrt/dynamo/lowering/__init__.py b/py/torch_tensorrt/dynamo/lowering/__init__.py
@@ -1,4 +1,5 @@
 from ._decompositions import get_decompositions  # noqa: F401
+from ._freeze_aot_graph import *  # noqa: F401
 from ._fusers import *  # noqa: F401
 from ._pre_aot_lowering import SUBSTITUTION_REGISTRY  # noqa: F401
 from ._pre_aot_lowering import register_substitution  # noqa: F401
diff --git a/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py
@@ -153,7 +153,10 @@ def is_node_supported(
     ) -> bool:
         node_name = ConverterRegistry.qualified_name_or_str(node.target)
 
-        if node in CONVERTERS and node_name not in self.torch_executed_ops:
+        if (
+            node.target in CONVERTERS.keys()
+            or (node.op == "get_attr" and "constant" in node_name)
+        ) and node_name not in self.torch_executed_ops:
             # If node is a proper, supported computational node, store the operator
             if not node.is_impure():
                 if node_name not in self.supported_operators:
diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
@@ -3,30 +3,27 @@
 import math
 import operator
 import warnings
-from typing import cast, Dict, Optional, Sequence, Tuple, Union
+from typing import Dict, Optional, Sequence, Tuple, Union, cast
 
 import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
-
-from ..converter_registry import tensorrt_converter
-
-from ..tracer.acc_tracer import acc_ops
-from ..types import *  # noqa: F403
 from torch.fx.immutable_collections import immutable_list
 from torch.fx.node import Argument, Target
-
-from ..utils import get_dynamic_dims, unified_dtype_converter, Frameworks
-
-from .converter_utils import *  # noqa: F403
+from torch_tensorrt.fx.converters.impl import activation, convolution
 from torch_tensorrt.fx.passes.lower_basic_pass import (
     trt_transposed_linear,
     trt_transposed_matmul,
 )
 from torch_tensorrt.fx.tracer.acc_tracer.acc_ops import contiguous
-from torch_tensorrt.fx.converters.impl import activation, convolution
+
+from ..converter_registry import tensorrt_converter
+from ..tracer.acc_tracer import acc_ops
+from ..types import *  # noqa: F403
+from ..utils import Frameworks, get_dynamic_dims, unified_dtype_converter
+from .converter_utils import *  # noqa: F403
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
@@ -2714,8 +2711,14 @@ def acc_ops_linear(
         "dim for linear and it can't be the last dim."
     )
 
-    if isinstance(kwargs["weight"], torch.Tensor):
-        weight = get_trt_tensor(network, kwargs["weight"].t(), f"{name}_weight")
+    if isinstance(kwargs["weight"], (torch.Tensor, np.ndarray)):
+        weight = get_trt_tensor(
+            network,
+            kwargs["weight"].t()
+            if isinstance(kwargs["weight"], torch.Tensor)
+            else kwargs["weight"].T,
+            f"{name}_weight",
+        )
         if target not in (acc_ops.linear, torch.ops.aten.linear):
             weight_op = trt.MatrixOperation.TRANSPOSE
         else:
diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py
@@ -1,8 +1,8 @@
 import operator
 import warnings
+from enum import Enum, auto
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
-from enum import Enum, auto
 import numpy as np
 
 # @manual=//deeplearning/trt/python:py_tensorrt
@@ -20,7 +20,7 @@
     TRTPluginFieldCollection,
     TRTTensor,
 )
-from ..utils import unified_dtype_converter, Frameworks
+from ..utils import Frameworks, unified_dtype_converter
 
 
 class SourceIR(Enum):
@@ -271,7 +271,7 @@ def create_constant(
     """
     constant = network.add_constant(
         (1,) if isinstance(value, (int, float)) else value.shape,
-        to_numpy(value, dtype),
+        to_numpy(value, dtype).copy(),
     )
     constant.name = name
     return constant.get_output(0)
@@ -311,7 +311,7 @@ def get_trt_tensor(
     elif isinstance(input_val, np.ndarray) and (
         input_val.dtype == np.bool_ or input_val.dtype == np.int64
     ):
-        input_val = input_val.to(np.int32)
+        input_val = input_val.astype(np.int32)
 
     if isinstance(input_val, (torch.Tensor, np.ndarray, int, float)):
         return create_constant(network, input_val, name, dtype)
diff --git a/py/torch_tensorrt/fx/converters/impl/convolution.py b/py/torch_tensorrt/fx/converters/impl/convolution.py
@@ -1,27 +1,23 @@
-import numpy as np
 from typing import Any, Optional, Sequence, Union
 
+import numpy as np
+
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
 from torch.fx.node import Target
-
+from torch_tensorrt.fx.converters import acc_ops_converters
 from torch_tensorrt.fx.converters.converter_utils import (
     SourceIR,
     extend_attr_to_tuple,
     get_dyn_range,
+    get_trt_tensor,
+    has_dynamic_shape,
     mark_as_int8_layer,
     set_layer_name,
-    has_dynamic_shape,
     to_numpy,
-    get_trt_tensor,
-)
-from torch_tensorrt.fx.converters import acc_ops_converters
-
-from torch_tensorrt.fx.types import (
-    TRTNetwork,
-    TRTTensor,
 )
+from torch_tensorrt.fx.types import TRTNetwork, TRTTensor
 
 
 def convNd(
@@ -54,7 +50,7 @@ def convNd(
         )
 
     # Process bias terms
-    if isinstance(bias, torch.Tensor):
+    if isinstance(bias, (torch.Tensor, np.ndarray)):
         # Transform the bias constant into a Numpy array
         bias = to_numpy(bias)
 
@@ -79,7 +75,7 @@ def convNd(
                 network, target, tuple(), kwargs, name + "_unsqueeze_weight"
             )
 
-    elif isinstance(weight, torch.Tensor):
+    elif isinstance(weight, (torch.Tensor, np.ndarray)):
         # Transform the weight constant into a Numpy array
         weight = to_numpy(weight)
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`from ._decompositions import get_decompositions # noqa: F401`
	`2`	`+from ._freeze_aot_graph import * # noqa: F401`
`2`	`3`	`from ._fusers import * # noqa: F401`
`3`	`4`	`from ._pre_aot_lowering import SUBSTITUTION_REGISTRY # noqa: F401`
`4`	`5`	`from ._pre_aot_lowering import register_substitution # noqa: F401`