fix: Add special cases where input of graph is output

gs-olive · gs-olive · commit a57c97f76fa4 · 2023-09-07T22:57:25.000-07:00
- TRT does not allow inputs of graphs to be outputs as well, however
many of the scenarios encountered in real models can have this
situation come up, especially in cases where the input is cloned or
copied and then returned
- The current converters will register these operators as a no-op,
causing TRT engine building to fail on such inputs
- Instead of requiring creation of an identity layer for every case of a
clone or copy node, we instead check if that node is the only operator
on a placeholder (input) and then insert the identity layer or not,
accordingly
- Coalesce implementations of clone and to_copy, which are effectively
the same operator
- Add test cases to validate new behavior
- Add new boilerplate converter validator utility to support this case
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union
 
 import tensorrt as trt
 import torch
@@ -9,6 +9,7 @@
 from torch_tensorrt.dynamo.conversion.converter_utils import (
     cast_int_int_div_trt_tensor,
     cast_trt_tensor,
+    is_only_operator_on_placeholder,
 )
 from torch_tensorrt.fx.converters import acc_ops_converters
 from torch_tensorrt.fx.types import TRTNetwork, TRTTensor
@@ -505,29 +506,59 @@ def aten_ops_permute(
     )
 
 
-def to_copy_dtype_validator(to_copy_node: Node) -> bool:
-    allowed_casts = {torch.float, torch.int32, torch.bool, torch.int8, torch.float16}
-
-    # Validate input node has convertible kwargs
-    if "dtype" in to_copy_node.kwargs:
-        if to_copy_node.kwargs["dtype"] in allowed_casts:
-            return True
+def to_copy_dtype_validator(placeholder_only: bool) -> Callable[[Node], bool]:
+    """Return validator for to_copy node with placeholder restrictions"""
+
+    def validate_dtype(to_copy_node: Node) -> bool:
+        """Returns true if the to_copy node can be converted to TRT
+
+        Based on data type being casted to
+        """
+        allowed_casts = {
+            torch.float,
+            torch.int32,
+            torch.bool,
+            torch.int8,
+            torch.float16,
+        }
+
+        # Validate input node has convertible kwargs
+        if "dtype" in to_copy_node.kwargs:
+            if to_copy_node.kwargs["dtype"] in allowed_casts:
+                return True
+            else:
+                _LOGGER.debug(
+                    f"_to_copy converter rejected node {to_copy_node} with dtype {to_copy_node.kwargs['dtype']}"
+                )
+                return False
         else:
             _LOGGER.debug(
-                f"_to_copy converter rejected node {to_copy_node} with dtype {to_copy_node.kwargs['dtype']}"
+                f"_to_copy converter rejected node {to_copy_node} with kwargs {to_copy_node.kwargs}"
             )
             return False
-    else:
-        _LOGGER.debug(
-            f"_to_copy converter rejected node {to_copy_node} with kwargs {to_copy_node.kwargs}"
+
+    def validator(to_copy_node: Node) -> bool:
+        """Returns true if the to_copy node can be converted to TRT
+        and the placeholder restriction is satisfied
+        """
+        # The placeholder restriction is satsfied if placeholder_only is the same
+        # truth value as is_only_operator_on_placeholder(to_copy_node)
+        return validate_dtype(to_copy_node) and (
+            (not placeholder_only) ^ is_only_operator_on_placeholder(to_copy_node)
         )
-        return False
+
+    return validator
 
 
 @dynamo_tensorrt_converter(
-    torch.ops.aten._to_copy.default, capability_validator=to_copy_dtype_validator
+    torch.ops.aten.clone.default,
+    capability_validator=lambda node: not is_only_operator_on_placeholder(node),
 )  # type: ignore[misc]
-def aten_ops_to_copy_dtype(
+@dynamo_tensorrt_converter(
+    torch.ops.aten._to_copy.default,
+    capability_validator=to_copy_dtype_validator(placeholder_only=False),
+)  # type: ignore[misc]
+def aten_ops_clone_copy_dtype(
     network: TRTNetwork,
     target: Target,
     args: Tuple[Argument, ...],
@@ -540,28 +571,41 @@ def aten_ops_to_copy_dtype(
         SourceIR.ATEN,
         name,
         args[0],
-        kwargs["dtype"],
+        kwargs.get("dtype", args[0].dtype),
+        force_layer=False,
     )
 
 
-@dynamo_tensorrt_converter(torch.ops.aten.clone.default)  # type: ignore[misc]
-def aten_ops_clone(
+@dynamo_tensorrt_converter(
+    torch.ops.aten.clone.default,
+    capability_validator=is_only_operator_on_placeholder,
+)  # type: ignore[misc]
+@dynamo_tensorrt_converter(
+    torch.ops.aten._to_copy.default,
+    capability_validator=to_copy_dtype_validator(placeholder_only=True),
+)  # type: ignore[misc]
+def aten_ops_clone_copy_placeholder(
     network: TRTNetwork,
     target: Target,
     args: Tuple[Argument, ...],
     kwargs: Dict[str, Argument],
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    return impl.cast.clone(
+    # For clone or copy nodes where the input is also the output,
+    # we need to force cast to ensure a layer is added to the TRT engine
+    # since TRT engine inputs cannot also be TRT engine outputs
+    return impl.cast.to_copy(
         network,
         target,
         SourceIR.ATEN,
         name,
         args[0],
+        kwargs.get("dtype", args[0].dtype),
+        force_layer=True,
     )
 
 
-@dynamo_tensorrt_converter(torch.ops.aten.expand.default)
+@dynamo_tensorrt_converter(torch.ops.aten.expand.default)  # type: ignore[misc]
 def aten_ops_expand(
     network: TRTNetwork,
     target: Target,
diff --git a/py/torch_tensorrt/dynamo/conversion/converter_utils.py b/py/torch_tensorrt/dynamo/conversion/converter_utils.py
@@ -43,27 +43,49 @@ def get_node_name(node: torch.fx.Node) -> str:
     return node_name
 
 
+def is_only_operator_on_placeholder(node: torch.fx.Node) -> bool:
+    """Detects whether a call_function node is the only operator on a placeholder"""
+    # Returns true if the node operates on a placeholder and is a direct output
+    return (
+        node.op == "call_function"
+        and any(
+            arg.op == "placeholder"
+            for arg in node.args
+            if isinstance(arg, torch.fx.Node)
+        )
+        and any(user.op == "output" for user in list(node.users.keys()))
+    )
+
+
 def dynamic_unsupported(node: torch.fx.Node) -> bool:
     # Validate that none of the inputs to the node have Dynamic shapes
     assert isinstance(
         node, torch.fx.Node
     ), "Inputs to validator functions must be FX Nodes"
 
     # Check node value itself
-    if getattr(node.meta["val"], "_has_symbolic_sizes_strides", False):
+    if ("val" in node.meta) and getattr(
+        node.meta["val"], "_has_symbolic_sizes_strides", False
+    ):
         return False
 
     # Check node arguments individually
     if any(
-        getattr(arg.meta["val"], "_has_symbolic_sizes_strides", False)
+        (
+            ("val" in arg.meta)
+            and getattr(arg.meta["val"], "_has_symbolic_sizes_strides", False)
+        )
         for arg in node.args
         if isinstance(arg, torch.fx.Node)
     ):
         return False
 
     # Check node keyword arguments individually
     if any(
-        getattr(kwarg.meta["val"], "_has_symbolic_sizes_strides", False)
+        (
+            ("val" in kwarg.meta)
+            and getattr(kwarg.meta["val"], "_has_symbolic_sizes_strides", False)
+        )
         for kwarg in node.kwargs.values()
         if isinstance(kwarg, torch.fx.Node)
     ):
@@ -80,9 +102,12 @@ def cast_trt_tensor(
     target: Target = "",
     source_ir: Optional[SourceIR] = None,
 ) -> TRTTensor:
-    """
-    Given a TRT Tensor, convert that Tensor to the specified dtype
+    """Given a TRT Tensor, convert that Tensor to the specified dtype
+
     Adds an Identity layer to the network which performs the conversion
+    if the input's dtype is different from the cast type. Otherwise returns
+    input unchanged
+
     Args:
         network (TRTNetwork): A TensorRT network
         input_val (TRTTensor): A TRT Tensor to cast to a new data type
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cast.py b/py/torch_tensorrt/dynamo/conversion/impl/cast.py
@@ -3,7 +3,12 @@
 
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
+from torch_tensorrt.dynamo.conversion.converter_registry import ConverterRegistry
 from torch_tensorrt.dynamo.conversion.converter_utils import cast_trt_tensor
+from torch_tensorrt.fx.converters.converter_utils import (
+    Frameworks,
+    unified_dtype_converter,
+)
 from torch_tensorrt.fx.types import TRTDataType, TRTNetwork, TRTTensor
 
 LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -16,28 +21,25 @@ def to_copy(
     name: str,
     input: TRTTensor,
     dtype: TRTDataType,
+    force_layer: bool = False,
 ) -> TRTTensor:
     if not isinstance(input, TRTTensor):
         raise RuntimeError(
             f"to_copy received input {input} that is not a TensorRT ITensor"
         )
 
-    casted_tensor = cast_trt_tensor(network, input, dtype, name, target, source_ir)
-    return casted_tensor
-
-
-def clone(
-    network: TRTNetwork,
-    target: Target,
-    source_ir: Optional[SourceIR],
-    name: str,
-    input: TRTTensor,
-) -> TRTTensor:
-    if not isinstance(input, TRTTensor):
-        raise RuntimeError(
-            f"clone received input {input} that is not a TensorRT ITensor"
-        )
-
-    LOGGER.debug(f"Evaluating clone on object with name: {name}")
-
-    return input
+    # If cast is forced, insert identity layer regardless of whether the dtype
+    # doesn't change
+    if force_layer:
+        trt_dtype = unified_dtype_converter(dtype, Frameworks.TRT)
+        source_ir = source_ir if source_ir is not None else SourceIR.UNKNOWN
+        target_str = ConverterRegistry.qualified_name_or_str(target)
+        target_name = f"{source_ir}_ops{('.' + target_str) if target_str else ''}"
+
+        identity_layer = network.add_identity(input)
+        identity_layer.set_output_type(0, trt_dtype)
+        identity_layer.name = f"Forced Cast ITensor {input.name} from {input.dtype} to {trt_dtype} - [{target_name}]-[{name}]"
+        return identity_layer.get_output(0)
+    else:
+        casted_tensor = cast_trt_tensor(network, input, dtype, name, target, source_ir)
+        return casted_tensor
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/base.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/base.py
@@ -2,6 +2,7 @@
 import warnings
 from typing import Any, Callable, Optional, Union
 
+import numpy as np
 import tensorrt as trt
 import torch
 from torch.fx.node import Target
@@ -11,7 +12,6 @@
     broadcast,
     get_trt_tensor,
     set_layer_name,
-    squeeze_left,
 )
 from torch_tensorrt.fx.types import TRTElementWiseOp, TRTNetwork, TRTTensor
 from torch_tensorrt.fx.utils import Frameworks, unified_dtype_converter
@@ -75,10 +75,10 @@ def convert_binary_elementwise(
     is_rhs_trt_tensor = False
 
     if isinstance(lhs_val, TRTTensor):
-        lhs_dtype = unified_dtype_converter(lhs_val.dtype, Frameworks.TORCH)
+        lhs_dtype = lhs_val.dtype
         is_lhs_trt_tensor = True
     if isinstance(rhs_val, TRTTensor):
-        rhs_dtype = unified_dtype_converter(rhs_val.dtype, Frameworks.TORCH)
+        rhs_dtype = rhs_val.dtype
         is_rhs_trt_tensor = True
 
     if not is_lhs_trt_tensor and not is_rhs_trt_tensor:
@@ -103,23 +103,13 @@ def convert_binary_elementwise(
     # dtype but we don't have a way to detect whether it makes sense for the
     # scalar to be float or half. Hence we go with the lhs dtype.
     if is_lhs_trt_tensor and isinstance(rhs_val, (float, int)):
-        rhs_val = torch.tensor([rhs_val], dtype=lhs_dtype)
+        rhs_val = np.array(
+            [rhs_val], dtype=unified_dtype_converter(lhs_dtype, Frameworks.NUMPY)
+        )
     if is_rhs_trt_tensor and isinstance(lhs_val, (float, int)):
-        lhs_val = torch.tensor([lhs_val], dtype=rhs_dtype)
-
-    # When lhs is scalar, and rhs has shape [1,], then currently the assert
-    # will fail because lhs shape has fewer dimensions than rhs shape.  This
-    # happens when using implicit batch dimension, when we removed the 1st
-    # dimension from input tensor, causing it to have shape [] - a scalar.  We
-    # fix it by reducing the rhs constant with a squeeze_left, so it becomes a
-    # scalar too. More generally, we squeeze_left on input if it's a constant
-    # tensor. This is safe because broadcast will pad dimensions on the left
-    # (prepend) to make lhs and rhs shape compatible.
-    if network.has_implicit_batch_dimension:
-        if isinstance(lhs_val, torch.Tensor):
-            lhs_val = squeeze_left(lhs_val)
-        if isinstance(rhs_val, torch.Tensor):
-            rhs_val = squeeze_left(rhs_val)
+        lhs_val = np.array(
+            [lhs_val], dtype=unified_dtype_converter(rhs_dtype, Frameworks.NUMPY)
+        )
 
     lhs_val = get_trt_tensor(network, lhs_val, f"{name}_lhs", lhs_dtype)
     rhs_val = get_trt_tensor(network, rhs_val, f"{name}_rhs", rhs_dtype)
diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py
@@ -35,6 +35,19 @@ def forward(self, x):
             disable_passes=True,
         )
 
+    def test_clone_direct(self):
+        class Clone(nn.Module):
+            def forward(self, x):
+                return x.clone()
+
+        inputs = [torch.randn((8, 2, 10))]
+        self.run_test(
+            Clone(),
+            inputs,
+            expected_ops={torch.ops.aten.clone.default},
+            disable_passes=True,
+        )
+
 
 class TestToCopyConverter(DispatchTestCase):
     def test_to_copy_half(self):
@@ -83,6 +96,20 @@ def forward(self, x):
                 disable_passes=True,
             )
 
+    def test_to_copy_direct(self):
+        class ToCopyFloat(nn.Module):
+            def forward(self, x):
+                return x.to(dtype=torch.float, copy=True)
+
+        inputs = [torch.rand((1, 3, 10)).float()]
+        self.run_test(
+            ToCopyFloat(),
+            inputs,
+            expected_ops={torch.ops.aten._to_copy.default},
+            precision=torch.float,
+            disable_passes=True,
+        )
+
 
 if __name__ == "__main__":
     run_tests()