fix: Repair input aliasing with clone insertion

gs-olive · gs-olive · commit c63e41136f18 · 2023-09-13T12:33:25.000-07:00
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -13,7 +13,7 @@
 from torch_tensorrt.dynamo.lowering import (
     apply_lowering_passes,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs
@@ -76,12 +76,13 @@ def _pretraced_backend(
         with unittest.mock.patch.object(
             fake_mode, "allow_non_fake_inputs", True
         ), fake_mode:
-            replace_builtin_inplace_ops(gm)
+            repair_input_aliasing(gm)
 
             # Invoke AOTAutograd to translate operators to aten
             gm = aot_export_joint_simple(
                 gm,
                 sample_inputs,
+                trace_joint=False,
                 decompositions=get_decompositions(
                     settings.enable_experimental_decompositions
                 ),
diff --git a/py/torch_tensorrt/dynamo/lowering/__init__.py b/py/torch_tensorrt/dynamo/lowering/__init__.py
@@ -2,6 +2,6 @@
 from ._fusers import *  # noqa: F401
 from ._pre_aot_lowering import SUBSTITUTION_REGISTRY  # noqa: F401
 from ._pre_aot_lowering import register_substitution  # noqa: F401
-from ._replace_inplace_ops import replace_builtin_inplace_ops
+from ._repair_input_aliasing import repair_input_aliasing
 from .passes import add_lowering_pass, apply_lowering_passes
 from .substitutions import *  # noqa: F401
diff --git a/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py b/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py
@@ -0,0 +1,38 @@
+import logging
+
+import torch
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import get_tensor_placeholders
+
+logger = logging.getLogger(__name__)
+
+
+def repair_input_aliasing(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Inserts clone operators temporarily ahead of every placeholder
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    Undone by `remove_input_alias_fixing_clones` after tracing
+    """
+    # Extract graph placeholder Tensors
+    placeholders = get_tensor_placeholders(gm)
+
+    for node in placeholders:
+        # Insert clones for placeholder nodes to avoid
+        # input aliasing or mutation
+        with gm.graph.inserting_after(placeholders[-1]):
+            cloned_input = gm.graph.call_function(
+                torch.ops.aten.clone.default,
+                args=(node,),
+            )
+
+        # Replace all uses of the placeholder except the cloned node
+        # with the cloned placeholder
+        node.replace_all_uses_with(
+            cloned_input,
+            delete_user_cb=lambda node: node != cloned_input,
+        )
+
+    gm.graph.lint()
+    gm.recompile()
+    logger.debug(f"Inserted auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py b/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/__init__.py b/py/torch_tensorrt/dynamo/lowering/passes/__init__.py
@@ -6,10 +6,12 @@
 # Import and order lowering passes and pass manager
 from .constant_folding import constant_fold
 from .pass_manager import DynamoPassManager
+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones
 from .repair_input_as_output import repair_input_as_output
 
 ATEN_LOWERING_PASSES = DynamoPassManager.build_from_passlist(
     [
+        remove_input_alias_fixing_clones,
         constant_fold,
         repair_input_as_output,
     ]
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
@@ -2,6 +2,9 @@
 
 import torch
 from torch_tensorrt._utils import sanitized_torch_version
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+)
 
 from packaging import version
 
@@ -47,9 +50,7 @@ def constant_fold(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
     for node in erased_params:
         gm.graph.erase_node(node)
 
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-    gm.recompile()
+    gm = clean_up_graph_after_modifications(gm)
 
     logger.debug(f"Graph after constant folding:\n{gm.graph}")
 
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py b/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py
@@ -0,0 +1,31 @@
+from typing import List
+
+import torch
+
+
+def clean_up_graph_after_modifications(
+    gm: torch.fx.GraphModule,
+) -> torch.fx.GraphModule:
+    """Runs dead-code elimination, linting, and recompilation for graph, in-place"""
+    gm.graph.eliminate_dead_code()
+    gm.graph.lint()
+    gm.recompile()
+    return gm
+
+
+def get_tensor_placeholders(
+    gm: torch.fx.GraphModule,
+) -> List[torch.fx.Node]:
+    """Returns placeholder nodes of GraphModule which are torch.Tensor types"""
+    # Tensor placeholders must be subclasses of torch.Tensor
+    placeholders = [
+        node
+        for node in gm.graph.nodes
+        if (
+            node.op == "placeholder"
+            and isinstance(node.type, type)
+            and issubclass(node.type, torch.Tensor)
+        )
+    ]
+
+    return placeholders
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py b/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py
@@ -0,0 +1,39 @@
+import logging
+
+import torch
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Delete this lowering pass once aot_export_joint_simple is patched
+def remove_input_alias_fixing_clones(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Remove the auxiliary clone nodes inserted to fix input aliasing
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    """
+    modified_graph = False
+
+    for node in gm.graph.nodes:
+        # If the node is a placeholder and its only user is a clone node
+        # it was modified by the input alias-fixing pass, and the change
+        # needs to be undone
+        if (
+            node.op == "placeholder"
+            and len(node.users) == 1
+            and list(node.users)[0].target == torch.ops.aten.clone.default
+        ):
+            modified_graph = True
+
+            # Replace all uses of the clone with the placholder, delete the clone
+            clone_node = list(node.users)[0]
+            clone_node.replace_all_uses_with(node)
+            gm.graph.erase_node(clone_node)
+
+    if modified_graph:
+        gm = clean_up_graph_after_modifications(gm)
+        logger.debug(f"Removed auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py b/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py
@@ -1,6 +1,10 @@
 import logging
 
 import torch
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+    get_tensor_placeholders,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -13,15 +17,7 @@ def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
     modified_graph = False
 
     # Extract graph placeholder Tensors
-    placeholders = [
-        node
-        for node in gm.graph.nodes
-        if (
-            node.op == "placeholder"
-            and isinstance(node.type, type)
-            and issubclass(node.type, torch.Tensor)
-        )
-    ]
+    placeholders = get_tensor_placeholders(gm)
 
     for placeholder in placeholders:
         # If any placeholder has any users which are direct graph outputs
@@ -34,7 +30,7 @@ def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
             direct_outputs = [user for user in placeholder.users if user.op == "output"]
 
             # Insert clone node for placeholder to ensure placeholder is not a direct output
-            with gm.graph.inserting_after(placeholder):
+            with gm.graph.inserting_after(placeholders[-1]):
                 cloned_placeholder = gm.graph.call_function(
                     torch.ops.aten.clone.default,
                     args=(placeholder,),
@@ -45,9 +41,7 @@ def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
                 output.replace_input_with(placeholder, cloned_placeholder)
 
     if modified_graph:
-        gm.graph.eliminate_dead_code()
-        gm.graph.lint()
-        gm.recompile()
+        gm = clean_up_graph_after_modifications(gm)
         logger.debug(f"Graph after repair_input_as_output:\n{gm.graph}")
 
     return gm
diff --git a/tests/py/dynamo/backend/test_specialized_models.py b/tests/py/dynamo/backend/test_specialized_models.py
@@ -280,12 +280,12 @@ def forward(self, x):
 
     def test_input_modifications_mul(self):
         class InplaceMul(torch.nn.Module):
-            def forward(self, x):
+            def forward(self, x, y):
                 x *= 5.0
                 x *= 1.9
-                y = x + 1
-                y /= 1.3
-                return y
+                z = x + y
+                z /= 1.3
+                return z
 
         inputs = [
             torch.rand(
@@ -294,6 +294,12 @@ def forward(self, x):
                 5,
                 7,
             ).cuda(),
+            torch.rand(
+                1,
+                3,
+                5,
+                7,
+            ).cuda(),
         ]
 
         fx_graph = torch.fx.symbolic_trace(InplaceMul())
diff --git a/tests/py/dynamo/testing_utilities.py b/tests/py/dynamo/testing_utilities.py
@@ -10,7 +10,7 @@
 from torch_tensorrt.dynamo.lowering import (
     apply_lowering_passes,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 
@@ -43,12 +43,13 @@ def fx_dynamo_testing_backend(
     with unittest.mock.patch.object(
         fake_mode, "allow_non_fake_inputs", True
     ), fake_mode:
-        replace_builtin_inplace_ops(gm)
+        repair_input_aliasing(gm)
 
         # Invoke AOTAutograd to translate operators to aten
         gm = aot_export_joint_simple(
             gm,
             sample_inputs,
+            trace_joint=False,
             decompositions=get_decompositions(),
         )
 

Original file line number	Diff line number	Diff line change
`@@ -6,10 +6,12 @@`
`6`	`6`	`# Import and order lowering passes and pass manager`
`7`	`7`	`from .constant_folding import constant_fold`
`8`	`8`	`from .pass_manager import DynamoPassManager`
	`9`	`+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones`
`9`	`10`	`from .repair_input_as_output import repair_input_as_output`
`10`	`11`
`11`	`12`	`ATEN_LOWERING_PASSES = DynamoPassManager.build_from_passlist(`
`12`	`13`	`[`
	`14`	`+ remove_input_alias_fixing_clones,`
`13`	`15`	`constant_fold,`
`14`	`16`	`repair_input_as_output,`
`15`	`17`	`]`