add duplicate constant node pass

cccclai · facebook-github-bot · commit abeff359da23 · 2024-03-21T13:34:10.000-07:00
Summary: This diff is the follow up for #2424 In the case like ``` consant_0 (tag_10) ----> op_b (tag_10) |-------------> op_a (tag_11) ``` `op_b` and `op_a` are in two delegated payload and `constant_0` have two options: In this diff, we're making the default behavior as allowing copying, meaning it will become ``` consant_0 (tag_10)------------------> op_b (tag_10) consant_0_copy (tag_11) -------------> op_a (tag_11) ``` The backend can tag the node with `no_copy` to allert users in cases like constants are too large or etc. In this case, a better approach can be ``` consant_0 (tag_10) ----> op_b (tag_10) |-----(output consant_0) --------> op_a (tag_11) ``` Differential Revision: D55113232
diff --git a/exir/backend/TARGETS b/exir/backend/TARGETS
@@ -26,6 +26,7 @@ runtime.python_library(
         ":compile_spec_schema",
         "//caffe2:torch",
         "//executorch/exir/backend:utils",
+        "//executorch/exir/backend/canonical_partitioners:duplicate_constant_node_pass",
     ],
 )
 
diff --git a/exir/backend/backend_api.py b/exir/backend/backend_api.py
@@ -16,7 +16,10 @@
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 from executorch.exir.backend.partitioner import Partitioner, PartitionResult
-from executorch.exir.backend.utils import is_identical_graph
+from executorch.exir.backend.utils import (
+    _maybe_duplicate_constant_nodes,
+    is_identical_graph,
+)
 
 from executorch.exir.delegate import executorch_call_delegate, get_lowered_module_name
 
@@ -160,6 +163,7 @@ def _get_node_list_with_same_tag(
     Return a list of nodes with the same tag.
     """
     node_list = []
+
     for node in tagged_graph_module.graph.nodes:
         if node.meta.get("delegation_tag", "") == tag:
             if node.op == "output":
@@ -371,6 +375,10 @@ def to_backend(
     ), f"Partitioner {partitioner_instance} needs a `partition_tags` field containing a mapping of tags to delegate spec"
 
     update_to_real_program(tagged_exported_program, edge_program)
+
+    for tag, _ in partitioner_result.partition_tags.items():
+        _maybe_duplicate_constant_nodes(tagged_exported_program, tag, edge_program)
+
     tagged_graph_module = _partition_and_lower(
         tagged_exported_program.graph_module, partitioner_result, edge_program
     )
diff --git a/exir/backend/canonical_partitioners/TARGETS b/exir/backend/canonical_partitioners/TARGETS
@@ -19,3 +19,20 @@ runtime.python_library(
         "//executorch/exir/backend:partitioner",
     ],
 )
+
+runtime.python_library(
+    name = "duplicate_constant_node_pass",
+    srcs = [
+        "duplicate_constant_node_pass.py",
+    ],
+    visibility = [
+        "//executorch/...",
+        "//executorch/exir/backend/...",
+        "//executorch/test/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir/backend:partitioner",
+    ],
+)
diff --git a/exir/backend/canonical_partitioners/duplicate_constant_node_pass.py b/exir/backend/canonical_partitioners/duplicate_constant_node_pass.py
@@ -0,0 +1,83 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Set
+
+from torch.export import ExportedProgram
+from torch.export.exported_program import InputSpec, TensorArgument
+
+
+def duplicate_constant_node(
+    exported_program: ExportedProgram, candidate_node: str
+) -> Set[str]:
+    to_be_copied = [
+        node for node in exported_program.graph.nodes if node.name == candidate_node
+    ]
+    new_input_specs = []
+    old_signature = exported_program.graph_signature
+    copied_nodes = set()
+    for idx, node in enumerate(exported_program.graph.nodes):
+        if node.op != "placeholder":
+            continue
+        old_input_spec = old_signature.input_specs[idx]
+        new_input_specs.append(
+            InputSpec(
+                old_input_spec.kind,
+                old_input_spec.arg,
+                old_input_spec.target,
+                persistent=old_input_spec.persistent,
+            )
+        )
+        if node == to_be_copied[0]:
+            constant_tensor = node
+            users = list(node.users.keys())
+            for ith in range(len(node.users) - 1):
+                copy_constant_tensor_fqn = node.name + f"_copy_{ith}"
+                with exported_program.graph.inserting_before(constant_tensor):
+                    copied_constant_tensor = exported_program.graph.placeholder(
+                        copy_constant_tensor_fqn
+                    )
+                    copied_nodes.add(copy_constant_tensor_fqn)
+                    for k, v in node.meta.items():
+                        copied_constant_tensor.meta[k] = v
+                    copied_constant_tensor.meta["val"] = constant_tensor.meta["val"]
+                    new_args = tuple(
+                        [
+                            arg if arg != constant_tensor else copied_constant_tensor
+                            for arg in users[ith + 1].args
+                        ]
+                    )
+                    new_kwargs = dict(
+                        {
+                            (
+                                key,
+                                (
+                                    value
+                                    if value != constant_tensor
+                                    else copied_constant_tensor
+                                ),
+                            )
+                            for key, value in users[ith + 1].kwargs
+                        }
+                    )
+                    users[ith + 1].args = new_args
+                    users[ith + 1].kwargs = new_kwargs
+                    exported_program.state_dict[copy_constant_tensor_fqn] = (
+                        copied_constant_tensor
+                    )
+                    new_input_specs.append(
+                        InputSpec(
+                            kind=old_input_spec.kind,
+                            arg=TensorArgument(name=copy_constant_tensor_fqn),
+                            target=old_input_spec.target,
+                            persistent=old_input_spec.persistent,
+                        )
+                    )
+
+    exported_program.graph_signature.input_specs = new_input_specs
+    exported_program.graph_module.recompile()
+
+    return copied_nodes
diff --git a/exir/backend/test/TARGETS b/exir/backend/test/TARGETS
@@ -67,6 +67,7 @@ python_library(
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:partitioner",
+        "//executorch/exir/backend:utils",
         "//executorch/exir/backend/canonical_partitioners:canonical_partitioner_lib",
     ],
 )
@@ -283,6 +284,9 @@ python_unittest(
     srcs = [
         "test_partitioner.py",
     ],
+    preload_deps = [
+        "//executorch/exir/backend/test/demos/rpc:executor_backend_register",
+    ],
     deps = [
         "//caffe2:torch",
         "//executorch/exir:lib",
@@ -295,6 +299,19 @@ python_unittest(
         "//executorch/exir/dialects:lib",
         "//executorch/exir/tests:models",
         "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/extension/pytree:pylib",
         "//executorch/runtime/executor/test:test_backend_compiler_lib",
     ],
 )
+
+python_unittest(
+    name = "test_passes",
+    srcs = [
+        "test_passes.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+        "//executorch/exir/backend/canonical_partitioners:duplicate_constant_node_pass",
+    ],
+)
diff --git a/exir/backend/test/demos/rpc/targets.bzl b/exir/backend/test/demos/rpc/targets.bzl
@@ -39,6 +39,9 @@ def define_common_targets():
         srcs = [
             "ExecutorBackendRegister.cpp",
         ],
+        visibility = [
+            "//executorch/exir/backend/test/...",
+        ],
         deps = [
             ":executor_backend",
             "//executorch/runtime/backend:interface",
diff --git a/exir/backend/test/test_partitioner.py b/exir/backend/test/test_partitioner.py
@@ -31,6 +31,10 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 
 from executorch.exir.tests.models import MLP
+from executorch.extension.pybindings.portable_lib import (  # @manual=//executorch/extension/pybindings:portable_lib
+    _load_for_executorch_from_buffer,
+)
+from executorch.extension.pytree import tree_flatten
 from torch._export import capture_pre_autograd_graph
 from torch._export.utils import is_buffer, is_param
 from torch.export import export
@@ -446,6 +450,66 @@ def partition(
                     partition_tags=partition_tags,
                 )
 
+        inputs = (torch.ones(2, 2),)
+        model = capture_pre_autograd_graph(ReuseConstData(), (torch.ones(2, 2),))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        exec_prog = edge.to_backend(PartitionerTagData()).to_executorch()
+        executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
+        inputs_flattened, _ = tree_flatten(inputs)
+
+        # Send the input from server executor to client executor, and receive the result from client executor
+        _ = executorch_module.run_method("forward", inputs)
+
+    def test_partitioner_alert_split_constant_data(self):
+        """
+        We test that we throw an error when constant data users are split
+        between different delegated payloads or owning program.
+        """
+
+        class ReuseConstData(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.const = torch.ones(2, 2)
+
+            def forward(self, x):
+                y = x + self.const
+                z = x - self.const
+                return y, z
+
+        class PartitionerTagData(Partitioner):
+            def __init__(self):
+                super().__init__()
+                self.delegation_spec = DelegationSpec(
+                    ExecutorBackend.__name__,
+                    [CompileSpec(key, value) for key, value in self.spec.items()],
+                )
+
+            def partition(
+                self, edge_exported_program: ExportedProgram
+            ) -> PartitionResult:
+                partition_tags = {}
+                for node in edge_exported_program.graph.nodes:
+                    if node.op == "call_function" and node.target in [
+                        exir_ops.edge.aten.add.Tensor
+                    ]:
+                        delegation_tag = "tag0"
+                        node.meta["delegation_tag"] = delegation_tag
+                        partition_tags[delegation_tag] = self.delegation_spec
+
+                    if node.op == "placeholder" and (
+                        is_param(edge_exported_program, node)
+                        or is_buffer(edge_exported_program, node)
+                    ):
+                        delegation_tag = "tag0"
+                        node.meta["delegation_tag"] = delegation_tag
+                        node.meta["no_copy"] = True
+                        partition_tags[delegation_tag] = self.delegation_spec
+
+                return PartitionResult(
+                    tagged_exported_program=edge_exported_program,
+                    partition_tags=partition_tags,
+                )
+
         model = capture_pre_autograd_graph(ReuseConstData(), (torch.ones(2, 2),))
         edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
         with self.assertRaises(RuntimeError) as error:
diff --git a/exir/backend/test/test_passes.py b/exir/backend/test/test_passes.py
@@ -0,0 +1,41 @@
+import unittest
+
+import torch
+from executorch import exir
+from executorch.exir.backend.canonical_partitioners.duplicate_constant_node_pass import (
+    duplicate_constant_node,
+)
+from torch._export import capture_pre_autograd_graph
+from torch._export.utils import is_buffer
+from torch.testing import FileCheck
+
+
+class TestPaases(unittest.TestCase):
+    def test_duplicate_constant_node_pass(self):
+
+        class ReuseConstData(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.register_buffer("const", torch.ones(2, 2))
+
+            def forward(self, x):
+                y = x + self.const
+                z = x - self.const
+                return y, z
+
+        model = capture_pre_autograd_graph(ReuseConstData(), (torch.ones(2, 2),))
+        edge = exir.to_edge(torch.export.export(model, (torch.ones(2, 2),)))
+
+        const_nodes = [
+            node.name
+            for node in edge.exported_program().graph.nodes
+            if node.op == "placeholder" and is_buffer(edge.exported_program(), node)
+        ]
+
+        copied_nodes = duplicate_constant_node(edge.exported_program(), const_nodes[0])
+        self.assertEqual(len(copied_nodes), 1)
+
+        # Check that the new constant node is in the graph
+        FileCheck().check("arg0_1_copy_0").run(
+            edge.exported_program().graph_module.code
+        )
diff --git a/exir/backend/utils.py b/exir/backend/utils.py
@@ -12,6 +12,9 @@
 
 import torch
 from executorch.exir.backend.backend_details import ExportedProgram
+from executorch.exir.backend.canonical_partitioners.duplicate_constant_node_pass import (
+    duplicate_constant_node,
+)
 from executorch.exir.common import setting_python_recursive_limit
 from executorch.exir.delegate import executorch_call_delegate
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -174,6 +177,72 @@ def replace_quantized_partition_with_op(
     return (replaced_op, dequant_nodes, quant_nodes)
 
 
+def _assign_new_tag(
+    tagged_exported_program: ExportedProgram,
+    copied_nodes: Set[str],
+):
+    """
+    Assign new tag to the copied nodes.
+
+    Before the pass
+    consant_0 (tag_10) ------------------> op_b (tag_10)
+    consant_0_copy (tag_10) -------------> op_a (tag_11)
+
+    After the pass
+    consant_0 (tag_10) ------------------> op_b (tag_10)
+    consant_0_copy (tag_11) -------------> op_a (tag_11)
+
+    """
+    for node in tagged_exported_program.graph.nodes:
+        if node.op == "placeholder":
+            if node.name in copied_nodes:
+                users_tag = set()
+                for user in node.users:
+                    users_tag.add(user.meta.get("delegation_tag", None))
+                # Assign the tag to the copy constant node the same as their users.
+                if len(users_tag) == 1:
+                    node.meta["delegation_tag"] = users_tag.pop()
+
+
+def _maybe_duplicate_constant_nodes(
+    tagged_exported_program: ExportedProgram,
+    tag: str,
+    owning_program: ExportedProgram,
+) -> None:
+    """
+    If the constants node is shared by different tagged nodes, like
+    consant_0 ----> op_b (tag_10)
+    |-------------> op_a (tag_11)
+
+    we make default as constant_0 is duplicated to constant_0_1, constant_0_2, unless the node is tagged with "no_copy"
+    consant_0 ------------------> op_b (tag_10)
+    consant_0_copy -------------> op_a (tag_11)
+
+    backend can estimate how much they want to duplicate the constant node, either error out or default to duplicate
+    """
+    candidate_nodes = set()
+    for node in tagged_exported_program.graph.nodes:
+        if node.meta.get("delegation_tag", "") == tag:
+            if node.op == "placeholder":
+                for user in node.users:
+                    users_tag = user.meta.get("delegation_tag", None)
+                    if users_tag != tag:
+                        # If the node is tagged with "no_copy", we stop duplicating it and throw an error
+                        if node.meta.get("no_copy", False):
+                            raise RuntimeError(
+                                f"constant data node ({node}) is tagged with ({tag}) but has user ({user}) which has tag ({users_tag})"
+                            )
+                        else:
+                            candidate_nodes.add(node.name)
+    copied_nodes = set()
+    for candidate_node in candidate_nodes:
+        # Both tagged exported program and the owning program need to go through the same duplication pass
+        copied_nodes = duplicate_constant_node(tagged_exported_program, candidate_node)
+        duplicate_constant_node(owning_program, candidate_node)
+
+    _assign_new_tag(tagged_exported_program, copied_nodes)
+
+
 def _get_item_from_executorch_call_delegate(node: torch.fx.Node) -> bool:
     """
     Check if the node is the getitem followed by executorch_call_delegate node. These getitems node

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@ runtime.python_library(`
`26`	`26`	`":compile_spec_schema",`
`27`	`27`	`"//caffe2:torch",`
`28`	`28`	`"//executorch/exir/backend:utils",`
	`29`	`+ "//executorch/exir/backend/canonical_partitioners:duplicate_constant_node_pass",`
`29`	`30`	`],`
`30`	`31`	`)`
`31`	`32`