pytorch
diff --git a/‎backends/example/README.md
Lines changed: 32 additions & 0 deletions b/‎backends/example/README.md
Lines changed: 32 additions & 0 deletions
diff --git a/‎backends/example/TARGETS
Lines changed: 58 additions & 0 deletions b/‎backends/example/TARGETS
Lines changed: 58 additions & 0 deletions
diff --git a/‎backends/example/diagrams/delegate.png
248 KB b/‎backends/example/diagrams/delegate.png
248 KB
diff --git a/‎backends/example/diagrams/memory_permute.png
309 KB b/‎backends/example/diagrams/memory_permute.png
309 KB
diff --git a/‎backends/example/diagrams/quantize_delegate.png
307 KB b/‎backends/example/diagrams/quantize_delegate.png
307 KB
diff --git a/‎backends/example/example_backend.py
Lines changed: 40 additions & 0 deletions b/‎backends/example/example_backend.py
Lines changed: 40 additions & 0 deletions
diff --git a/‎backends/example/example_backend_delegate_passes/TARGETS
Lines changed: 16 additions & 0 deletions b/‎backends/example/example_backend_delegate_passes/TARGETS
Lines changed: 16 additions & 0 deletions
diff --git a/‎backends/example/example_backend_delegate_passes/merge_to_dim_pass.py
Lines changed: 79 additions & 0 deletions b/‎backends/example/example_backend_delegate_passes/merge_to_dim_pass.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎backends/example/example_backend_delegate_passes/permute_memory_formats_pass.py
Lines changed: 142 additions & 0 deletions b/‎backends/example/example_backend_delegate_passes/permute_memory_formats_pass.py
Lines changed: 142 additions & 0 deletions
diff --git a/‎backends/example/example_operators/TARGETS
Lines changed: 20 additions & 0 deletions b/‎backends/example/example_operators/TARGETS
Lines changed: 20 additions & 0 deletions
@@ -0,0 +1,32 @@
+This folder is an exmample backend to lower MobileNetV2. It covers the AOT side and showcase how to quantize and lower a MobileNetV2 to the example backend. The serialization format is purely string for demo purpose as it'll be up to backend's decision to choose serialization format.
+
+The folder structure incluces:
+- example_quantizer
+- example_partitioner
+- example_backend
+- examples_operators. Assuming all of them can run in the example backend.
+    - The OpBase defined in op_base.py is just the draft idea, it can be defined more comprehensively depending tosa operator definitions
+- example_backend_delegate_passes. It includes passes that might be helpful in the backend. Right now there are two passes: merge_to_dim_pass.py and permute_memory_formats_pass.py. They are examples to show how to represent memory format permutation and how to represent operators with different memory format (like channel last)
+    - merge_to_dim_pass.py only handles one merging cases. More cases need to be covered but should be straitforward.
+
+## High Level Flow
+
+In the following diagram, we show how to quantize a mobile net v2 model and lower it to ExampleBackend.
+
+### Quantize and Delegate
+
+We can define patterns based on the operators supported by the backend, which will be used by the quantizer and delegate.
+
+![](./diagrams/quantize_delegate.png)
+
+### Partitioner and Backend
+
+The way partitioner and backend is, partitioner will tag the nodes to lower to the backend and backend will will receive all tagged nodes and preprocess them as a delegate.
+
+![](./diagrams/delegate.png)
+
+### Memory format permute
+
+Some operators may have better performance in the memory format other than contiguous. One way to do that is to insert `to_dim_op` to describe memory format permutation and merge if there two opposite one next to each other.
+
+![](./diagrams/memory_permute.png)
@@ -0,0 +1,58 @@
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+
+python_library(
+    name = "example_quantizer",
+    srcs = [
+        "example_quantizer.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/backends/example/example_operators:example_operators_lib",
+    ],
+)
+
+python_library(
+    name = "example_backend",
+    srcs = [
+        "example_backend.py",
+    ],
+    deps = [
+        "//executorch/backends/example/example_backend_delegate_passes:lib",
+        "//executorch/exir/backend:backend_details",
+        "//executorch/exir/backend:compile_spec_schema",
+    ],
+)
+
+python_library(
+    name = "example_partitioner",
+    srcs = [
+        "example_partitioner.py",
+    ],
+    deps = [
+        ":example_backend",
+        "//caffe2:torch",
+        "//executorch/backends/example/example_operators:example_operators_lib",
+        "//executorch/exir:graph_module",
+        "//executorch/exir/backend:partitioner",
+        "//executorch/exir/backend/canonical_partitioners:canonical_partitioner_lib",
+        "//executorch/exir/dialects:lib",
+    ],
+)
+
+python_unittest(
+    name = "test_example_delegate",
+    srcs = [
+        "test_example_delegate.py",
+    ],
+    deps = [
+        ":example_partitioner",
+        ":example_quantizer",
+        "//caffe2:torch",
+        "//executorch/exir:delegate",
+        "//executorch/exir:lib",
+        "//executorch/exir/backend:backend_api",
+        "//executorch/exir/backend/canonical_partitioners:canonical_partitioner_lib",
+        "//pytorch/vision:torchvision",
+    ],
+)
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+from typing import final, List
+
+from executorch.backends.example.example_backend_delegate_passes.merge_to_dim_pass import (
+    MergeToDimPass,
+)
+from executorch.backends.example.example_backend_delegate_passes.permute_memory_formats_pass import (
+    PermuteMemoryFormatsPass,
+)
+
+from executorch.exir.backend.backend_details import (
+    BackendDetails,
+    ExportedProgram,
+    PreprocessResult,
+)
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+
+
+@final
+class TosaBackend(BackendDetails):
+    @staticmethod
+    def preprocess(
+        edge_program: ExportedProgram,
+        compile_specs: List[CompileSpec],
+    ) -> PreprocessResult:
+        print("entering  the lowerable parts in TosaBackend.preprocess....")
+
+        copy_edge_program = copy.deepcopy(edge_program)
+        copy_edge_program._transform(
+            PermuteMemoryFormatsPass(),
+            MergeToDimPass(),
+        )
+        processed_bytes = str(copy_edge_program.graph)
+        return PreprocessResult(bytes(processed_bytes, encoding="utf8"))
@@ -0,0 +1,16 @@
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+python_library(
+    name = "lib",
+    srcs = [
+        "merge_to_dim_pass.py",
+        "permute_memory_formats_pass.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/backends/example/example_operators:example_operators_lib",
+        "//executorch/exir:dim_order_utils",
+        "//executorch/exir:pass_base",
+        "//executorch/exir/dialects:lib",
+    ],
+)
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dim_order_utils import get_dim_order
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class MergeToDimPass(ExportPass):
+    """
+    This pass will insert to_dim ops to the pattern if satisfis requirement, like pattern_op.permuate_memory_format is set as True.
+    Example:
+        # Done for 1 to 1
+        before pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+        after pass: x -> to_dim(channel_last) -> conv -> conv -> to_dim_(contiguous) -> out
+
+        # Not Done for 1 to N
+        before pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+                                                                 |-------------> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+        after pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+                                                               |--------------> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+
+        # Not Done for N to 1
+        before pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+                     y -> to_dim(channel_last) -> conv -> to_dim_(contiguous) ---------|
+        after pass:  x -> to_dim(channel_last) -> conv -> conv -> to_dim_(contiguous) -> out
+                     y -> to_dim(channel_last) -> conv-----|
+
+        # Not Done for N to N
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        for node in graph_module.graph.nodes:
+            if node.target == exir_ops.edge.dim_order_ops._to_dim_order_copy.default:
+                # print(node, node.args, list(node.users), list(list(node.users)[0].args))
+                if len(node.users) == 1 and len(list(node.users)[0].args) == 2:
+                    args_map = {}
+                    node_kwargs = node.args[-1]
+                    node_users = list(node.users)
+
+                    in_to_dim_node_dim_order = node_kwargs["dim_order"]
+                    in_to_dim_node_dtype = node_kwargs["dtype"]
+                    out_to_dim_node = node_users[0]
+                    out_to_dim_node_kwargs = out_to_dim_node.args[-1]
+                    out_to_dim_node_dim_order = out_to_dim_node_kwargs["dim_order"]
+                    out_to_dim_node_dtype = out_to_dim_node_kwargs["dtype"]
+
+                    if (
+                        in_to_dim_node_dtype == out_to_dim_node_dtype
+                        and in_to_dim_node_dim_order
+                        == get_dim_order(torch.channels_last, 4)
+                        and out_to_dim_node_dim_order
+                        == get_dim_order(torch.contiguous_format, 4)
+                    ):
+
+                        out_to_dim_node_users = list(out_to_dim_node.users)
+                        assert len(out_to_dim_node_users) == 1
+                        out_to_dim_node_user = out_to_dim_node_users[0]
+                        args_map[out_to_dim_node] = node.args[0]
+                        out_to_dim_node_user_new_args = [
+                            args_map[out_to_dim_node] if arg in args_map else arg
+                            for arg in out_to_dim_node_user.args
+                        ]
+                        print("out_to_dim_node_user.args: ", out_to_dim_node_user.args)
+                        print(
+                            "out_to_dim_node_user_new_args: ",
+                            out_to_dim_node_user_new_args,
+                        )
+                        out_to_dim_node_user.args = tuple(out_to_dim_node_user_new_args)
+
+                        graph_module.erase_node(out_to_dim_node)
+                        graph_module.erase_node(node)
+            # TODO: Handle other merging rules, including 1->N, N->1, N->N
+        return PassResult(graph_module, True)
@@ -0,0 +1,142 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import chain
+
+import torch
+from executorch.backends.example.example_operators.ops import module_to_annotator
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dim_order_utils import get_dim_order
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
+
+
+class PermuteMemoryFormatsPass(ExportPass):
+    """
+    This pass will insert to_dim ops to the pattern if satisfis requirement, like pattern_op.permuate_memory_format is set as True.
+    Example 1:
+        before pass: x -> conv -> out
+        after pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+
+        before pass: x -> conv -> conv -> out
+        after pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> out
+
+        before pass: x -> conv -> linear -> out
+        after pass: x -> to_dim(channel_last) -> conv -> to_dim_(contiguous) -> to_dim(channel_last) -> linear -> to_dim_(contiguous) -> out
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        for pattern in list(module_to_annotator.keys()):
+            pattern_op = module_to_annotator[pattern]
+            if pattern_op.permuate_memory_format:
+                partitions = find_sequential_partitions(
+                    graph_module,
+                    pattern,
+                )
+                for partition in partitions:
+                    # Some unpacking logic to get a flatten exit nodes list
+                    output_nodes = [
+                        node
+                        for node in partition[0].output_nodes
+                        if node.op != "placeholder"
+                    ]
+                    exit_nodes = [output_node.users for output_node in output_nodes]
+                    exit_nodes = list(chain.from_iterable(exit_nodes))
+
+                    """
+                    # Step 1. Insert to_dim op when exit the pattern
+                    # for example, if the pattern is conv, x -> conv -> out will become x -> conv -> to_dim(contiguous) -> out when permute memory format
+                    # for x -> conv -> conv -> out, it will become x -> conv -> to_dim(contiguous) -> conv -> to_dim(contiguous) -> out
+                    """
+                    for exit_node in exit_nodes:
+                        with graph_module.graph.inserting_before(exit_node):
+                            # Handle the case when the pattern output is also the graph output,
+                            # like, x -> conv -> out will become x -> conv -> to_dim(contiguous) -> out
+                            if exit_node.op == "output":
+                                exit_to_dim_op = graph_module.graph.call_function(
+                                    exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+                                    exit_node.args,
+                                    {
+                                        "dtype": torch.float64,
+                                        "dim_order": get_dim_order(
+                                            torch.contiguous_format, 4
+                                        ),
+                                    },
+                                )
+                                # Insert to_dim op and it'll be the return op
+                                _ = graph_module.graph.output(exit_to_dim_op)
+                                # Remove the old return op.
+                                graph_module.graph.erase_node(exit_node)
+                            # Handle the case when the pattern output is intermediate output,
+                            # like, x -> conv -> conv -> out will become x -> conv -> to_dim(contiguous) -> conv -> out
+                            elif exit_node.op == "call_function":
+                                exit_node_args = []
+                                for exit_node_arg in exit_node.args:
+                                    if (
+                                        isinstance(exit_node_arg, torch.fx.Node)
+                                        and exit_node_arg.op != "placeholder"
+                                    ):
+                                        exit_to_dim_op = graph_module.graph.call_function(
+                                            exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+                                            (exit_node_arg,),
+                                            {
+                                                "dtype": torch.float64,
+                                                "dim_order": get_dim_order(
+                                                    torch.contiguous_format, 4
+                                                ),
+                                            },
+                                        )
+                                        exit_node_args.append(exit_to_dim_op)
+                                    else:
+                                        exit_node_args.append(exit_node_arg)
+                                exit_node.args = list(exit_node_args)
+
+                    """
+                    # Step 2. Insert to_dim op when enter the pattern. After the first step, we already have to_dim(default) when exiting the pattern.
+                    # Now we need to insert to_dim(channel_last) when enter the pattern.
+                    # for example, if the pattern is conv, x -> conv -> to_dim(contiguous) -> out will become x -> to_dim(channel_last) -> conv -> to_dim(contiguous) -> out
+                    # for x -> conv -> to_dim(contiguous) -> conv -> to_dim(contiguous) -> out, it will become x -> to_dim(channel_last) -> conv -> to_dim(contiguous) -> to_dim(channel_last) -> conv -> to_dim(contiguous) -> out
+                    """
+                    # create the input_node and the to_dim_op map
+                    # for example, if the pattern is conv, x -> conv -> out, node
+                    input_node_map = {}  # key: input_node, value: to_dim_op
+                    to_dim_op_set = set()
+                    for input_node in partition[0].input_nodes:
+                        with graph_module.graph.inserting_after(input_node):
+                            to_dim_op = graph_module.graph.call_function(
+                                # Insert the to_dim op and update input_node_map
+                                exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+                                (
+                                    input_node,
+                                    {
+                                        "dtype": torch.float64,
+                                        "dim_order": get_dim_order(
+                                            torch.channels_last, 4
+                                        ),
+                                    },
+                                ),
+                            )
+                            input_node_map[input_node] = to_dim_op
+                            to_dim_op_set.add(to_dim_op)
+
+                    # Update the args to the new to_dim op, skip if it's already set
+                    for input_node in partition[0].input_nodes:
+                        for user in list(input_node.users):
+                            # if user is in to_dim_op_set, it means the user's arg is already set to_dim op
+                            if user not in to_dim_op_set:
+                                user_new_arg = [
+                                    input_node_map[user_arg]
+                                    if user_arg in input_node_map
+                                    else user_arg
+                                    for user_arg in user.args
+                                ]
+                                # Update input node's users arg
+                                user.args = tuple(user_new_arg)
+
+        # Ensure the graph is still valid
+        graph_module.graph.lint()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
@@ -0,0 +1,20 @@
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+python_library(
+    name = "example_operators_lib",
+    srcs = [
+        "adaptive_avg_pool2d.py",
+        "add.py",
+        "conv2d.py",
+        "conv_relu.py",
+        "dropout.py",
+        "flatten.py",
+        "linear.py",
+        "op_base.py",
+        "ops.py",
+        "utils.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)