fix: Repair input aliasing with clone insertion

gs-olive · gs-olive · commit f23cbb7f1227 · 2023-09-11T18:42:32.000-07:00
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -13,7 +13,7 @@
 from torch_tensorrt.dynamo.lowering import (
     apply_lowering_passes,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs
@@ -75,12 +75,13 @@ def _pretraced_backend(
         with unittest.mock.patch.object(
             fake_mode, "allow_non_fake_inputs", True
         ), fake_mode:
-            replace_builtin_inplace_ops(gm)
+            repair_input_aliasing(gm)
 
             # Invoke AOTAutograd to translate operators to aten
             gm = aot_export_joint_simple(
                 gm,
                 sample_inputs,
+                trace_joint=False,
                 decompositions=get_decompositions(
                     settings.enable_experimental_decompositions
                 ),
diff --git a/py/torch_tensorrt/dynamo/lowering/__init__.py b/py/torch_tensorrt/dynamo/lowering/__init__.py
@@ -2,6 +2,6 @@
 from ._fusers import *  # noqa: F401
 from ._pre_aot_lowering import SUBSTITUTION_REGISTRY  # noqa: F401
 from ._pre_aot_lowering import register_substitution  # noqa: F401
-from ._replace_inplace_ops import replace_builtin_inplace_ops
+from ._repair_input_aliasing import repair_input_aliasing
 from .passes import add_lowering_pass, apply_lowering_passes
 from .substitutions import *  # noqa: F401
diff --git a/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py b/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py
@@ -0,0 +1,36 @@
+import logging
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+def repair_input_aliasing(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Inserts clone operators temporarily ahead of every placeholder
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    Undone by `remove_input_alias_fixing_clones` after tracing
+    """
+    placeholders = [node for node in gm.graph.nodes if node.op == "placeholder"]
+
+    for node in placeholders:
+        # Insert clones for placeholder nodes to avoid
+        # input aliasing or mutation
+        with gm.graph.inserting_after(placeholders[-1]):
+            cloned_input = gm.graph.call_function(
+                torch.ops.aten.clone.default,
+                args=(node,),
+            )
+
+        # Replace all uses of the placeholder except the cloned node
+        # with the cloned placeholder
+        node.replace_all_uses_with(
+            cloned_input,
+            delete_user_cb=lambda node: node != cloned_input,
+        )
+
+    gm.graph.lint()
+    gm.recompile()
+    logger.debug(f"Inserted auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py b/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/__init__.py b/py/torch_tensorrt/dynamo/lowering/passes/__init__.py
@@ -4,10 +4,14 @@
 from torch.fx.passes.pass_manager import PassManager
 
 from .constant_folding import constant_fold
+
+# Import and order lowering passes
+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones
 from .repair_input_as_output import repair_input_as_output
 
 ATEN_LOWERING_PASSES = PassManager.build_from_passlist(
     [
+        remove_input_alias_fixing_clones,
         constant_fold,
         repair_input_as_output,
     ]
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
@@ -2,6 +2,9 @@
 
 import torch
 from torch._inductor.constant_folding import ConstantFolder, replace_node_with_constant
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -30,9 +33,7 @@ def constant_fold(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
     for node in erased_params:
         gm.graph.erase_node(node)
 
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-    gm.recompile()
+    gm = clean_up_graph_after_modifications(gm)
 
     logger.debug(f"Graph after constant folding:\n{gm.graph}")
 
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py b/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py
@@ -0,0 +1,11 @@
+import torch
+
+
+def clean_up_graph_after_modifications(
+    gm: torch.fx.GraphModule,
+) -> torch.fx.GraphModule:
+    """Runs dead-code elimination, linting, and recompilation for graph, in-place"""
+    gm.graph.eliminate_dead_code()
+    gm.graph.lint()
+    gm.recompile()
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py b/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py
@@ -0,0 +1,39 @@
+import logging
+
+import torch
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Delete this lowering pass once aot_export_joint_simple is patched
+def remove_input_alias_fixing_clones(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Remove the auxiliary clone nodes inserted to fix input aliasing
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    """
+    modified_graph = False
+
+    for node in gm.graph.nodes:
+        # If the node is a placeholder and its only user is a clone node
+        # it was modified by the input alias-fixing pass, and the change
+        # needs to be undone
+        if (
+            node.op == "placeholder"
+            and len(node.users) == 1
+            and list(node.users)[0].target == torch.ops.aten.clone.default
+        ):
+            modified_graph = True
+
+            # Replace all uses of the clone with the placholder, delete the clone
+            clone_node = list(node.users)[0]
+            clone_node.replace_all_uses_with(node)
+            gm.graph.erase_node(clone_node)
+
+    if modified_graph:
+        gm = clean_up_graph_after_modifications(gm)
+        logger.debug(f"Removed auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py b/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py
@@ -1,6 +1,9 @@
 import logging
 
 import torch
+from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
+    clean_up_graph_after_modifications,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -37,9 +40,7 @@ def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
                 output.replace_input_with(placeholder, cloned_placeholder)
 
     if modified_graph:
-        gm.graph.eliminate_dead_code()
-        gm.graph.lint()
-        gm.recompile()
+        gm = clean_up_graph_after_modifications(gm)
         logger.debug(f"Graph after repair_input_as_output:\n{gm.graph}")
 
     return gm
diff --git a/tests/py/dynamo/backend/test_specialized_models.py b/tests/py/dynamo/backend/test_specialized_models.py
@@ -280,12 +280,12 @@ def forward(self, x):
 
     def test_input_modifications_mul(self):
         class InplaceMul(torch.nn.Module):
-            def forward(self, x):
+            def forward(self, x, y):
                 x *= 5.0
                 x *= 1.9
-                y = x + 1
-                y /= 1.3
-                return y
+                z = x + y
+                z /= 1.3
+                return z
 
         inputs = [
             torch.rand(
@@ -294,6 +294,12 @@ def forward(self, x):
                 5,
                 7,
             ).cuda(),
+            torch.rand(
+                1,
+                3,
+                5,
+                7,
+            ).cuda(),
         ]
 
         fx_graph = torch.fx.symbolic_trace(InplaceMul())
diff --git a/tests/py/dynamo/testing_utilities.py b/tests/py/dynamo/testing_utilities.py
@@ -10,7 +10,7 @@
 from torch_tensorrt.dynamo.lowering import (
     apply_lowering_passes,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 
@@ -43,7 +43,7 @@ def fx_dynamo_testing_backend(
     with unittest.mock.patch.object(
         fake_mode, "allow_non_fake_inputs", True
     ), fake_mode:
-        replace_builtin_inplace_ops(gm)
+        repair_input_aliasing(gm)
 
         # Invoke AOTAutograd to translate operators to aten
         gm = aot_export_joint_simple(

Original file line number	Diff line number	Diff line change
`@@ -4,10 +4,14 @@`
`4`	`4`	`from torch.fx.passes.pass_manager import PassManager`
`5`	`5`
`6`	`6`	`from .constant_folding import constant_fold`
	`7`	`+`
	`8`	`+# Import and order lowering passes`
	`9`	`+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones`
`7`	`10`	`from .repair_input_as_output import repair_input_as_output`
`8`	`11`
`9`	`12`	`ATEN_LOWERING_PASSES = PassManager.build_from_passlist(`
`10`	`13`	`[`
	`14`	`+ remove_input_alias_fixing_clones,`
`11`	`15`	`constant_fold,`
`12`	`16`	`repair_input_as_output,`
`13`	`17`	`]`