Inject Inplace Copies into Graph for Mutable Bufers (#1995)

JacobSzwejbka · facebook-github-bot · commit a704dd649ad8 · 2024-02-20T15:49:52.000-08:00
Summary: Pull Request resolved: #1995 Injects copy nodes into the graph Reviewed By: larryliu0820 Differential Revision: D53713415 fbshipit-source-id: 78381f9df5356a50c126ad8eee7955e2d8e0be10
diff --git a/exir/passes/TARGETS b/exir/passes/TARGETS
@@ -10,6 +10,7 @@ python_library(
     deps = [
         ":const_prop_pass",
         ":debug_handle_generator_pass",
+        ":insert_write_back_for_buffers_pass",
         ":memory_format_ops_pass",
         ":memory_planning_pass",
         ":normalize_transpose_pass",
@@ -51,6 +52,16 @@ python_library(
     ],
 )
 
+python_library(
+    name = "insert_write_back_for_buffers_pass",
+    srcs = [
+        "insert_write_back_for_buffers_pass.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
 python_library(
     name = "const_prop_pass",
     srcs = [
diff --git a/exir/passes/__init__.py b/exir/passes/__init__.py
@@ -36,6 +36,9 @@
 from executorch.exir.passes.debug_handle_generator_pass import DebugHandleGeneratorPass
 
 from executorch.exir.passes.executorch_prim_ops_registry import _EXECUTORCH_SYM_OPS
+from executorch.exir.passes.insert_write_back_for_buffers_pass import (
+    insert_write_back_for_buffers_pass,
+)
 from executorch.exir.passes.memory_format_ops_pass import MemoryFormatOpsPass
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 from executorch.exir.passes.normalize_transpose_pass import NormalizeTransposePass
@@ -65,6 +68,7 @@
     "MemoryFormatOpsPass",
     "MemoryPlanningPass",
     "HintBasedSymShapeEvalPass",
+    "insert_write_back_for_buffers_pass",
 ]
 
 Argument = Optional[
diff --git a/exir/passes/insert_write_back_for_buffers_pass.py b/exir/passes/insert_write_back_for_buffers_pass.py
@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional
+
+import torch
+
+from torch.export.exported_program import (
+    ExportedProgram,
+    ExportGraphSignature,
+    InputKind,
+    OutputKind,
+    OutputSpec,
+)
+from torch.utils import _pytree as pytree
+
+
+def _insert_copy(
+    gm: torch.fx.GraphModule,
+    mutated_outputs: List[Optional[str]],
+    input_name_to_node: Dict[str, torch.fx.Node],
+):
+    """
+    Find the all the buffers and inputs that were mutated and insert copy_
+    operators to reflect mutations.
+    """
+    output_node = None
+    for node in gm.graph.nodes:
+        if node.op == "output":
+            output_node = node
+            break
+    assert output_node is not None
+    outputs = pytree.tree_flatten(output_node.args)[0]
+    assert len(outputs) == len(mutated_outputs)
+
+    user_output_nodes = []
+    buffer_output_nodes = []
+    for return_node, mutated_node_name in zip(outputs, mutated_outputs):
+        # User output, leave alone
+        if mutated_node_name is None:
+            user_output_nodes.append(return_node)
+            continue
+
+        # Mutable buffer grab the node
+        if mutated_node_name in input_name_to_node:
+            mutated_node = input_name_to_node[mutated_node_name]
+        else:
+            raise RuntimeError(
+                f"Could not find {mutated_node_name} in either buffer or input nodes"
+            )
+
+        # insert copy
+        with gm.graph.inserting_before(output_node):
+            buffer_output = gm.graph.call_function(
+                torch.ops.aten.copy_.default, (mutated_node, return_node)
+            )
+            # add output of copy to graph outputs
+            buffer_output_nodes.append(buffer_output)
+
+    with gm.graph.inserting_before(output_node):
+        buffer_output_nodes.extend(user_output_nodes)
+        # Remove old outputs
+        new_output = gm.graph.output(tuple(buffer_output_nodes))
+        output_node.replace_all_uses_with(new_output)
+        gm.graph.erase_node(output_node)
+    return buffer_output_nodes
+
+
+def insert_write_back_for_buffers_pass(ep: ExportedProgram):
+    gm: torch.fx.GraphModule = ep.graph_module
+    lifted_inputs: List[Optional[str]] = [
+        in_spec.target
+        if in_spec.kind
+        in (
+            InputKind.BUFFER,
+            InputKind.CONSTANT_TENSOR,
+            InputKind.PARAMETER,
+            InputKind.CUSTOM_OBJ,
+        )
+        else None
+        for in_spec in ep.graph_signature.input_specs
+    ]
+
+    # Grab the mutable buffer nodes in the outputs
+    mutated_outputs: List[Optional[str]] = [
+        out_spec.target if out_spec.kind in (OutputKind.BUFFER_MUTATION,) else None
+        for out_spec in ep.graph_signature.output_specs
+    ]
+
+    input_name_to_node: Dict[str, torch.fx.Node] = {}
+
+    placeholder_nodes = [node for node in gm.graph.nodes if node.op == "placeholder"]
+    assert len(lifted_inputs) == len(placeholder_nodes)
+    # Grab the all the non user inputs
+    for input_node, lifted_node in zip(placeholder_nodes, lifted_inputs):
+        if lifted_node is not None:
+            input_name_to_node[lifted_node] = input_node
+
+    # insert the copy ops and update the outputs
+    buffer_output_nodes = _insert_copy(gm, mutated_outputs, input_name_to_node)
+    gm.graph.lint()
+    gm.graph.eliminate_dead_code()
+    gm.recompile()
+
+    # patch the output signature to point to the new updated outputs
+    new_output_specs: List[OutputSpec] = []
+    i = 0
+    for output_spec in ep.graph_signature.output_specs:
+        if output_spec.kind == OutputKind.BUFFER_MUTATION:
+            output_spec.arg.name = buffer_output_nodes[i].name
+            i += 1
+        new_output_specs.append(output_spec)
+
+    signature = ExportGraphSignature(
+        input_specs=ep.graph_signature.input_specs,
+        output_specs=new_output_specs,
+    )
+
+    return gm, signature
diff --git a/exir/program/TARGETS b/exir/program/TARGETS
@@ -30,6 +30,7 @@ python_library(
         "//executorch/exir/capture:config",
         "//executorch/exir/emit:emit",
         "//executorch/exir/emit:lib",
+        "//executorch/exir/passes:insert_write_back_for_buffers_pass",
         "//executorch/exir/passes:lib",
         "//executorch/exir/passes:remove_graph_asserts_pass",
         "//executorch/exir/passes:remove_mixed_type_operators",
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -26,6 +26,9 @@
     MemoryFormatOpsPass,
     OpReplacePass,
 )
+from executorch.exir.passes.insert_write_back_for_buffers_pass import (
+    insert_write_back_for_buffers_pass,
+)
 from executorch.exir.passes.remove_graph_asserts_pass import RemoveGraphAssertsPass
 from executorch.exir.passes.remove_mixed_type_operators import RemoveMixedTypeOperators
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
@@ -45,6 +48,7 @@
     ExportGraphSignature,
     InputKind,
     InputSpec,
+    OutputKind,
     OutputSpec,
     TensorArgument,
 )
@@ -1034,6 +1038,7 @@ def to_executorch(
 
         execution_programs: Dict[str, ExportedProgram] = {}
         for name, program in self._edge_programs.items():
+            gm, _ = insert_write_back_for_buffers_pass(program)
             new_gm = program.graph_module
             for p in edge_to_executorch_passes(config):
                 new_gm_res = p(new_gm)
diff --git a/exir/tests/TARGETS b/exir/tests/TARGETS
@@ -213,6 +213,7 @@ python_unittest(
         "//executorch/exir/emit:lib",
         "//executorch/exir/passes:constant_prop_pass",
         "//executorch/exir/passes:debug_handle_generator_pass",
+        "//executorch/exir/passes:insert_write_back_for_buffers_pass",
         "//executorch/exir/passes:lib",
         "//executorch/exir/passes:remove_graph_asserts_pass",
         "//executorch/exir/passes:remove_mixed_type_operators",
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
@@ -33,6 +33,9 @@
 )
 from executorch.exir.passes.constant_prop_pass import constant_prop_pass
 from executorch.exir.passes.debug_handle_generator_pass import DebugHandleGeneratorPass
+from executorch.exir.passes.insert_write_back_for_buffers_pass import (
+    insert_write_back_for_buffers_pass,
+)
 from executorch.exir.passes.remove_graph_asserts_pass import RemoveGraphAssertsPass
 from executorch.exir.passes.remove_mixed_type_operators import RemoveMixedTypeOperators
 from executorch.exir.passes.replace_edge_with_backend_pass import EdgeToBackendOpsPass
@@ -1195,3 +1198,49 @@ def forward(self, pred, x):
             error_msg,
         ):
             _ = constant_prop_pass(edge.exported_program())
+
+    def test_mutable_buffers(self) -> None:
+        def count_copies(gm: torch.fx.GraphModule) -> int:
+            return sum(
+                (node.target == torch.ops.aten.copy_.default) for node in gm.graph.nodes
+            )
+
+        class MutableStateModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.register_buffer("state", torch.zeros(1))
+
+            def forward(self, x):
+                y = x + self.state
+                self.state.add_(1)
+                return y
+
+        model = to_edge(
+            export(
+                MutableStateModule(),
+                (torch.zeros(1),),
+            )
+        )
+        self.assertEqual(count_copies(model.exported_program().graph_module), 0)
+        # Before
+        # graph():
+        #     %arg0_1 : [num_users=2] = placeholder[target=arg0_1]
+        #     %_lifted_tensor_constant1 : [num_users=1] = placeholder[target=_lifted_tensor_constant1]
+        #     %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
+        #     %aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%arg1_1, %arg0_1), kwargs = {})
+        #     %aten__to_copy_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%_lifted_tensor_constant1,), kwargs = {dtype: torch.float32})
+        #     %aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%arg0_1, %aten__to_copy_default), kwargs = {})
+        #     return (aten_add_tensor_1, aten_add_tensor)
+        gm, _ = insert_write_back_for_buffers_pass(model.exported_program())
+
+        # After
+        # graph():
+        #     %arg0_1 : [num_users=3] = placeholder[target=arg0_1]
+        #     %_lifted_tensor_constant1 : [num_users=1] = placeholder[target=_lifted_tensor_constant1]
+        #     %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
+        #     %aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%arg1_1, %arg0_1), kwargs = {})
+        #     %aten__to_copy_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%_lifted_tensor_constant1,), kwargs = {dtype: torch.float32})
+        #     %aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%arg0_1, %aten__to_copy_default), kwargs = {})
+        #     %copy__default : [num_users=1] = call_function[target=torch.ops.aten.copy_.default](args = (%arg0_1, %aten_add_tensor_1), kwargs = {})
+        #     return (copy__default, aten_add_tensor)
+        self.assertEqual(count_copies(gm), 1)