fix: Repair input aliasing with clone insertion

gs-olive · gs-olive · commit 43d21b292c2e · 2023-08-30T18:07:26.000-07:00
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -13,7 +13,7 @@
 from torch_tensorrt.dynamo.lowering import (
     ATEN_LOWERING_PASSES,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs
@@ -75,7 +75,7 @@ def _pretraced_backend(
         with unittest.mock.patch.object(
             fake_mode, "allow_non_fake_inputs", True
         ), fake_mode:
-            replace_builtin_inplace_ops(gm)
+            repair_input_aliasing(gm)
 
             # Invoke AOTAutograd to translate operators to aten
             gm = aot_export_joint_simple(
diff --git a/py/torch_tensorrt/dynamo/lowering/__init__.py b/py/torch_tensorrt/dynamo/lowering/__init__.py
@@ -2,6 +2,6 @@
 from ._fusers import *  # noqa: F401
 from ._pre_aot_lowering import SUBSTITUTION_REGISTRY  # noqa: F401
 from ._pre_aot_lowering import register_substitution  # noqa: F401
-from ._replace_inplace_ops import replace_builtin_inplace_ops
+from ._repair_input_aliasing import repair_input_aliasing
 from .passes import ATEN_LOWERING_PASSES
 from .substitutions import *  # noqa: F401
diff --git a/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py b/py/torch_tensorrt/dynamo/lowering/_repair_input_aliasing.py
@@ -0,0 +1,35 @@
+import logging
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+def repair_input_aliasing(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Inserts clone operators temporary ahead of every placeholder
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    Undone by `remove_input_alias_fixing_clones` after tracing
+    """
+    for node in gm.graph.nodes:
+        if node.op == "placeholder":
+            # Insert clone for placeholder node to avoid
+            # input aliasing or mutation
+            with gm.graph.inserting_after(node):
+                cloned_input = gm.graph.call_function(
+                    torch.ops.aten.clone.default,
+                    args=(node,),
+                )
+
+            # Replace all uses of the placeholder except the cloned node
+            # with the cloned placeholder
+            node.replace_all_uses_with(
+                cloned_input,
+                delete_user_cb=lambda node: node != cloned_input,
+            )
+
+    gm.graph.lint()
+    gm.recompile()
+    logger.debug(f"Inserted auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py b/py/torch_tensorrt/dynamo/lowering/_replace_inplace_ops.py
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/__init__.py b/py/torch_tensorrt/dynamo/lowering/passes/__init__.py
@@ -1,10 +1,15 @@
 from torch.fx.passes.pass_manager import PassManager
 
 from .constant_folding import constant_fold
+from .pass_utils import clean_up_graph_after_modifications
+
+# Import and order lowering passes
+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones
 from .repair_input_as_output import repair_input_as_output
 
 ATEN_LOWERING_PASSES = PassManager.build_from_passlist(
     [
+        remove_input_alias_fixing_clones,
         constant_fold,
         repair_input_as_output,
     ]
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
@@ -2,6 +2,7 @@
 
 import torch
 from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
+from torch_tensorrt.dynamo.lowering.passes import clean_up_graph_after_modifications
 
 logger = logging.getLogger(__name__)
 
@@ -30,9 +31,7 @@ def constant_fold(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
     for node in erased_params:
         gm.graph.erase_node(node)
 
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-    gm.recompile()
+    gm = clean_up_graph_after_modifications(gm)
 
     logger.debug(f"Graph after constant folding:\n{gm.graph}")
 
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py b/py/torch_tensorrt/dynamo/lowering/passes/pass_utils.py
@@ -0,0 +1,11 @@
+import torch
+
+
+def clean_up_graph_after_modifications(
+    gm: torch.fx.GraphModule,
+) -> torch.fx.GraphModule:
+    """Runs dead-code elimination, linting, and recompilation for graph, in-place"""
+    gm.graph.eliminate_dead_code()
+    gm.graph.lint()
+    gm.recompile()
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py b/py/torch_tensorrt/dynamo/lowering/passes/remove_input_alias_fixing_clones.py
@@ -0,0 +1,37 @@
+import logging
+
+import torch
+from torch_tensorrt.dynamo.lowering.passes import clean_up_graph_after_modifications
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Delete this lowering pass once aot_export_joint_simple is patched
+def remove_input_alias_fixing_clones(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Remove the auxiliary clone nodes inserted to fix input aliasing
+
+    See: https://github.com/pytorch/pytorch/issues/108079
+    """
+    modified_graph = False
+
+    for node in gm.graph.nodes:
+        # If the node is a placeholder and its only user is a clone node
+        # it was modified by the input alias-fixing pass, and the change
+        # needs to be undone
+        if (
+            node.op == "placeholder"
+            and len(node.users) == 1
+            and list(node.users)[0].target == torch.ops.aten.clone.default
+        ):
+            modified_graph = True
+
+            # Replace all uses of the clone with the placholder, delete the clone
+            clone_node = list(node.users)[0]
+            clone_node.replace_all_uses_with(node)
+            gm.graph.erase_node(clone_node)
+
+    if modified_graph:
+        gm = clean_up_graph_after_modifications(gm)
+        logger.debug(f"Inserted auxiliary clone nodes for placeholders:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py b/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py
@@ -1,6 +1,7 @@
 import logging
 
 import torch
+from torch_tensorrt.dynamo.lowering.passes import clean_up_graph_after_modifications
 
 logger = logging.getLogger(__name__)
 
@@ -37,9 +38,7 @@ def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
                 output.replace_input_with(placeholder, cloned_placeholder)
 
     if modified_graph:
-        gm.graph.eliminate_dead_code()
-        gm.graph.lint()
-        gm.recompile()
+        gm = clean_up_graph_after_modifications(gm)
         logger.debug(f"Graph after repair_input_as_output:\n{gm.graph}")
 
     return gm
diff --git a/tests/py/dynamo/testing_utilities.py b/tests/py/dynamo/testing_utilities.py
@@ -10,7 +10,7 @@
 from torch_tensorrt.dynamo.lowering import (
     ATEN_LOWERING_PASSES,
     get_decompositions,
-    replace_builtin_inplace_ops,
+    repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 
@@ -43,7 +43,7 @@ def fx_dynamo_testing_backend(
     with unittest.mock.patch.object(
         fake_mode, "allow_non_fake_inputs", True
     ), fake_mode:
-        replace_builtin_inplace_ops(gm)
+        repair_input_aliasing(gm)
 
         # Invoke AOTAutograd to translate operators to aten
         gm = aot_export_joint_simple(

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,15 @@`
`1`	`1`	`from torch.fx.passes.pass_manager import PassManager`
`2`	`2`
`3`	`3`	`from .constant_folding import constant_fold`
	`4`	`+from .pass_utils import clean_up_graph_after_modifications`
	`5`	`+`
	`6`	`+# Import and order lowering passes`
	`7`	`+from .remove_input_alias_fixing_clones import remove_input_alias_fixing_clones`
`4`	`8`	`from .repair_input_as_output import repair_input_as_output`
`5`	`9`
`6`	`10`	`ATEN_LOWERING_PASSES = PassManager.build_from_passlist(`
`7`	`11`	`[`
	`12`	`+ remove_input_alias_fixing_clones,`
`8`	`13`	`constant_fold,`
`9`	`14`	`repair_input_as_output,`
`10`	`15`	`]`