pytorch
diff --git a/‎.github/scripts/check_labels.py
Lines changed: 3 additions & 3 deletions b/‎.github/scripts/check_labels.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/scripts/github_utils.py
Lines changed: 3 additions & 3 deletions b/‎.github/scripts/github_utils.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_add.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/op_add.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/TARGETS
Lines changed: 46 additions & 0 deletions b/‎backends/cadence/aot/TARGETS
Lines changed: 46 additions & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 21 additions & 1 deletion b/‎backends/cadence/aot/compiler.py
Lines changed: 21 additions & 1 deletion
diff --git a/‎backends/cadence/aot/export_example.py
Lines changed: 3 additions & 3 deletions b/‎backends/cadence/aot/export_example.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/aot/fuse_ops.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/graph_builder.py
Lines changed: 107 additions & 0 deletions b/‎backends/cadence/aot/graph_builder.py
Lines changed: 107 additions & 0 deletions
diff --git a/‎backends/cadence/aot/pass_utils.py
Lines changed: 9 additions & 0 deletions b/‎backends/cadence/aot/pass_utils.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/cadence/aot/passes.py
Lines changed: 15 additions & 0 deletions b/‎backends/cadence/aot/passes.py
Lines changed: 15 additions & 0 deletions
@@ -45,15 +45,15 @@ def main() -> None:
 
     try:
         if not has_required_labels(pr):
-            print(LABEL_ERR_MSG)
+            print(LABEL_ERR_MSG, flush=True)
             add_label_err_comment(pr)
             if args.exit_non_zero:
-                sys.exit(1)
+                raise RuntimeError("PR does not have required labels")
         else:
             delete_all_label_err_comments(pr)
     except Exception as e:
         if args.exit_non_zero:
-            sys.exit(1)
+            raise RuntimeError(f"Error checking labels: {e}") from e
 
     sys.exit(0)
 
 
@@ -72,10 +72,10 @@ def gh_fetch_url(
     headers: Optional[Dict[str, str]] = None,
     data: Union[Optional[Dict[str, Any]], str] = None,
     method: Optional[str] = None,
-    reader: Callable[[Any], Any] = lambda x: x.read(),
+    reader: Callable[[Any], Any] = json.load,
 ) -> Any:
     return gh_fetch_url_and_headers(
-        url, headers=headers, data=data, reader=json.load, method=method
+        url, headers=headers, data=data, reader=reader, method=method
     )[1]
 
 
@@ -169,7 +169,7 @@ def gh_post_commit_comment(
 
 def gh_delete_comment(org: str, repo: str, comment_id: int) -> None:
     url = f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/comments/{comment_id}"
-    gh_fetch_url(url, method="DELETE")
+    gh_fetch_url(url, method="DELETE", reader=lambda x: x.read())
 
 
 def gh_fetch_merge_base(org: str, repo: str, base: str, head: str) -> str:
 
@@ -136,7 +136,7 @@ jobs:
       fail-fast: false
     with:
       runner: linux.4xlarge
-      docker-image: executorch-ubuntu-22.04-clang12-android
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models
 
@@ -302,7 +302,7 @@ jobs:
       fail-fast: false
     with:
       runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12-android
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900
 
@@ -82,6 +82,7 @@ def define_node(
 
         if needs_rescale:
             # Scale output back to 8 bit
+            # pyre-ignore
             tqutils.rescale_node_back_to_int8(node, add_output, scale, tosa_graph)
 
 
 
@@ -38,6 +38,7 @@ python_library(
     deps = [
         ":passes",
         ":utils",
+        ":ops_registrations",
         "//caffe2:torch",
         "//executorch/backends/cadence/aot/quantizer:fusion_pass",
         "//executorch/backends/cadence/aot/quantizer:quantizer",
@@ -71,6 +72,8 @@ python_library(
     ],
     deps = [
         ":utils",
+        ":fuse_ops",
+        ":simplify_ops",
         "//caffe2:torch",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
@@ -132,6 +135,18 @@ python_library(
     ],
 )
 
+python_library(
+    name = "graph_builder",
+    srcs = [
+        "graph_builder.py",
+    ],
+    typing = True,
+    deps = [
+        "fbcode//caffe2:torch",
+        "fbcode//executorch/exir:pass_base",
+    ],
+)
+
 python_library(
     name = "fuse_ops",
     srcs = [
@@ -150,3 +165,34 @@ python_library(
         "//executorch/exir/passes:spec_prop_pass",
     ],
 )
+
+python_library(
+    name = "simplify_ops",
+    srcs = [
+        "simplify_ops.py",
+    ],
+    typing = True,
+    deps = [
+        ":pass_utils",
+        "//executorch/backends/cadence/aot:pass_utils",
+        "//executorch/exir:pass_base",
+        "//executorch/exir/dialects:lib",
+    ],
+)
+
+python_unittest(
+    name = "test_graph_builder",
+    srcs = [
+        "tests/test_graph_builder.py",
+    ],
+    typing = True,
+    deps = [
+        "//caffe2:torch",
+        "//executorch/backends/cadence/aot:graph_builder",
+        "//executorch/backends/cadence/aot:pass_utils",
+        "//executorch/exir:pass_base",
+        "//executorch/exir/dialects:lib",
+        "//later:lib",
+        ":ops_registrations"
+    ],
+)
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Callable, cast, Optional
 
+import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
 
 from executorch.backends.cadence.aot.passes import ReplaceSafeSoftmaxWithSoftmax
@@ -196,7 +197,26 @@ def export_to_edge(
 # Export the model and lower it to an EdgeProgramManager (in edge IR), and
 # apply passes specific to Cadence DSP execution. Return both to print the
 # differences.
-def export_to_cadence_edge_executorch(
+def export_to_cadence(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    dump_graphs: bool = False,
+    output_dir: Optional[str] = None,
+    opt_level: int = 1,
+) -> EdgeProgramManager:
+    edge_prog_manager = export_to_edge(model, inputs)
+    cadence_passes = get_cadence_passes(opt_level)
+
+    # Run a couple required passes for quant/dequant ops
+    cadence_prog_manager = edge_prog_manager.transform(
+        cast(
+            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
+        )
+    )
+    return cadence_prog_manager
+
+
+def export_to_executorch_gen_etrecord(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
 
@@ -16,7 +16,7 @@
 
 from executorch.backends.cadence.aot.compiler import (
     convert_pt2,
-    export_to_cadence_edge_executorch,
+    export_to_executorch_gen_etrecord,
     fuse_pt2,
 )
 
@@ -86,8 +86,8 @@ def export_model(
     quantized_model = fuse_pt2(converted_model, quantizer)
 
     # Get edge program after Cadence specific passes
-    exec_prog: ExecutorchProgramManager = export_to_cadence_edge_executorch(
-        quantized_model, example_inputs, working_dir
+    exec_prog: ExecutorchProgramManager = export_to_executorch_gen_etrecord(
+        quantized_model, example_inputs, output_dir=working_dir
     )
 
     logging.info("Final exported graph:\n")
 
@@ -1022,7 +1022,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         return PassResult(graph_module, True)
 
 
-class FuseOpsInGraph:
+class CadenceFuseOpsInGraph:
     passes = [
         FuseMMWithAdd,
         FuseBatchNormWithConv,
 
@@ -0,0 +1,107 @@
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+# pyre-strict
+
+import logging
+from typing import Optional, Sequence, Union
+
+import torch
+from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
+from torch._subclasses import FakeTensor, FakeTensorMode
+from torch.fx.node import Argument, Target
+from torch.utils import _pytree as pytree
+
+
+class GraphBuilder(ExportPass):
+    """Utility class for creating a graph module with user-specified ops.
+
+    This class allows us to create test graph modules with any ops we want
+    directly, rather than relying on decomposition or passes.
+
+    Usage:
+        builder = GraphBuilder()
+        # To insert placeholders, use builder.placeholder.
+        x = builder.placeholder("x", torch.randn(1, 3, 224, 224))
+        # To insert an op, use builder.call_operator.
+        op = builder.call_operator(
+            some_op
+            (x, other_args, ...),
+        )
+        # Insert outputs as a list of ProxyValues using builder.output.
+        builder.output([op])
+        # Get GraphModule from builder.
+        gm = builder.get_graph_module()
+    """
+
+    def __init__(self) -> None:
+        self.exporter = ExportPass()
+        self.tracer: ExportPass.ExportTracer = self.ExportTracer(
+            self, torch.fx.graph.CodeGen()
+        )
+        self.fake_tensor_mode = FakeTensorMode(allow_fallback_kernels=False)
+        self.tracer.fake_tensor_mode = self.fake_tensor_mode
+
+        # This will be called to create nodes in tracer.
+        self.interpreter = torch.fx.Interpreter(
+            torch.fx.GraphModule(torch.nn.Module(), torch.fx.Graph())
+        )
+
+    # pyre-ignore[14]: Inconsistent override.
+    def placeholder(
+        self, target: str, fake_tensor: Union[FakeTensor, torch.Tensor]
+    ) -> ProxyValue:
+        if not isinstance(fake_tensor, FakeTensor):
+            fake_tensor = self.fake_tensor_mode.from_tensor(fake_tensor)
+        logging.info(f"Creating placeholder {target} => {fake_tensor.shape}")
+        placeholder = super().placeholder(target, fake_tensor, NodeMetadata({}))
+        return placeholder
+
+    # pyre-ignore[14]: Inconsistent override.
+    def output(self, results: list[ProxyValue]) -> ProxyValue:
+        logging.info(f"Creating outputs {results}")
+        return super().output(results, NodeMetadata({}))
+
+    def get_graph_module(self) -> torch.fx.GraphModule:
+        return torch.fx.GraphModule(self.tracer.root, self.tracer.graph)
+
+    def call_operator(
+        self,
+        op,  # pyre-ignore
+        args: tuple[Argument, ...],
+        kwargs: Optional[dict[str, Argument]] = None,
+        meta: Optional[NodeMetadata] = None,
+    ) -> ProxyValue:
+        if meta is None:
+            meta = NodeMetadata({})
+        if kwargs is None:
+            kwargs = {}
+        return super().call_operator(op, args, kwargs, meta)
+
+
+def single_op_builder(
+    placeholders: Sequence[Union[torch.Tensor, FakeTensor]],
+    op: Target,
+    args: Sequence[Argument],
+    kwargs: Optional[dict[str, Argument]] = None,
+) -> torch.fx.GraphModule:
+    """Create a graph module with a single op.
+
+    Args:
+        placeholders: Placeholders to be used as inputs to the GraphModule.
+        op: The op to be inserted.
+        args: The args to be passed to the op.
+        kwargs: The kwargs to be passed to the op.
+
+    Returns:
+        A graph module with a single op
+    """
+    builder = GraphBuilder()
+    op_to_placeholder_dict = {
+        p: builder.placeholder(f"p_{i}", p) for i, p in enumerate(placeholders)
+    }
+    proxy_args, proxy_kwargs = pytree.tree_map_only(
+        (torch.Tensor, FakeTensor), lambda x: op_to_placeholder_dict[x], (args, kwargs)
+    )
+    node = builder.call_operator(op, proxy_args, proxy_kwargs)
+    builder.output([node])
+    return builder.get_graph_module()
@@ -89,3 +89,12 @@ def get_node_names_list_from_gm(
             continue
         graph_nodes.append(node.name)
     return graph_nodes
+
+
+def count_node(graph_module: torch.fx.GraphModule, target: torch.fx.node.Target) -> int:
+    """Count the number of nodes with target `target` in the graph."""
+    total = 0
+    for node in graph_module.graph.nodes:
+        if node.op == "call_function" and node.target == target:
+            total += 1
+    return total
@@ -11,11 +11,13 @@
 import torch
 import torch.fx
 import torch.utils._pytree as pytree
+from executorch.backends.cadence.aot.fuse_ops import CadenceFuseOpsInGraph
 from executorch.backends.cadence.aot.pass_utils import (
     CadencePassAttribute,
     create_cadence_pass_filter,
     register_cadence_pass,
 )
+from executorch.backends.cadence.aot.simplify_ops import CadenceSimplifyOpsInGraph
 from executorch.backends.cadence.aot.utils import get_edge_overload_packet
 from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -346,10 +348,23 @@ def get_passes_in_default_order() -> List[Type[PassType]]:
         ReplaceScalarTensorWithFullPass,
         RemoveCloneOpsTransformImported,
         RemoveNopExpandOpPass,
+        CadenceFuseOpsInGraph.passes,
         ReplaceSqueezeAndUnsqueezeWithViewPass,
         ReplacePT2QuantWithCadenceQuantPass,
         ReplacePT2DequantWithCadenceDequantPass,
+        CadenceSimplifyOpsInGraph.passes,
         # TODO: add the rest of the passes here.
+        # InitializePipeline,
+        # RemoveRedundantOps.passes,
+        # ReorderOpsInGraph.passes,
+        # RemoveJarvisNops.passes,
+        # CadenceFuseOpsInGraph.passes,
+        # ReplaceOpsInGraph.passes,
+        # SimplifyOpsInGraph.passes,
+        # FinalizePipeline,
+        # FuseFullThenReshapePass,
+        # FuseTransposeOpPairsPass,
+        # RemoveNopSliceOrViewOpPass,
     ]
     return pytree.tree_flatten(passes)[0]