pytorch
diff --git a/‎.github/workflows/pull.yml
Lines changed: 0 additions & 28 deletions b/‎.github/workflows/pull.yml
Lines changed: 0 additions & 28 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 24 deletions b/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 24 deletions
diff --git a/‎backends/arm/scripts/build_executor_runner.sh
Lines changed: 1 addition & 1 deletion b/‎backends/arm/scripts/build_executor_runner.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/test_arm_baremetal.sh
Lines changed: 4 additions & 1 deletion b/‎backends/arm/test/test_arm_baremetal.sh
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/test/test_model.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/test_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/replace_ops.py
Lines changed: 0 additions & 26 deletions b/‎backends/cadence/aot/replace_ops.py
Lines changed: 0 additions & 26 deletions
diff --git a/‎backends/cadence/aot/tests/test_fusion_ops_passes.py
Lines changed: 24 additions & 25 deletions b/‎backends/cadence/aot/tests/test_fusion_ops_passes.py
Lines changed: 24 additions & 25 deletions
diff --git a/‎backends/cadence/aot/tests/test_replace_ops_passes.py
Lines changed: 0 additions & 32 deletions b/‎backends/cadence/aot/tests/test_replace_ops_passes.py
Lines changed: 0 additions & 32 deletions
diff --git a/‎backends/xnnpack/operators/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/xnnpack/operators/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/xnnpack/operators/op_log.py
Lines changed: 52 additions & 0 deletions b/‎backends/xnnpack/operators/op_log.py
Lines changed: 52 additions & 0 deletions
diff --git a/‎backends/xnnpack/partition/config/__init__.py
Lines changed: 2 additions & 0 deletions b/‎backends/xnnpack/partition/config/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/xnnpack/partition/config/generic_node_configs.py
Lines changed: 7 additions & 0 deletions b/‎backends/xnnpack/partition/config/generic_node_configs.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/xnnpack/partition/configs.py
Lines changed: 1 addition & 0 deletions b/‎backends/xnnpack/partition/configs.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/xnnpack/runtime/XNNCompiler.cpp
Lines changed: 31 additions & 0 deletions b/‎backends/xnnpack/runtime/XNNCompiler.cpp
Lines changed: 31 additions & 0 deletions
@@ -343,34 +343,6 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
         PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
 
-  test-pybind-build-linux:
-    name: test-pybind-build-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # build module for executorch.extension.pybindings.portable_lib
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" \
-        bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-
-        # see if we can import the module successfully
-        python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
-
   test-binary-size-linux-gcc:
     name: test-binary-size-linux-gcc
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -289,30 +289,6 @@ jobs:
         # Build and test coreml delegate
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
 
-  test-pybind-build-macos:
-    name: test-pybind-build-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        bash .ci/scripts/setup-conda.sh
-
-        # build module for executorch.extension.pybindings.portable_lib
-        BUILD_TOOL=${{ matrix.build-tool }}
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-
-        # see if we can import the module successfully
-        ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
-
   test-static-llama-ane:
     name: test-static-llama-ane
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
 
@@ -103,7 +103,7 @@ then
     memory_mode="Shared_Sram"
     if [[ ${target} =~ "ethos-u85" ]]
     then
-        memory_mode="Dedicated_Sram_384KB"
+        memory_mode="Sram_Only"
     fi
 fi
 
 
@@ -210,7 +210,10 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
-    python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
+    # Temporarily not test inception_v4 on Ethos-U85. To support inception_v4 properly on Ethos-U85, we need to run the model in Dedicated_Sram memory mode with
+    # 384KB(or another amount lower than 2MB) of SRAM passed as fast scratch area. The PR adding support for Dedicated_Sram(https://github.com/pytorch/executorch/pull/10714) 
+    # was reverted due to a change required in an internal variant of the examples/arm/executor_runner/arm_executor_runner.cpp
+    # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
 
     echo "${TEST_SUITE_NAME}: PASS"
     }
 
@@ -81,7 +81,7 @@ def get_args():
         if "u55" in args.target:
             args.memory_mode = "Shared_Sram"
         elif "u85" in args.target:
-            args.memory_mode = "Dedicated_Sram_384KB"
+            args.memory_mode = "Sram_Only"
         else:
             raise RuntimeError(f"Invalid target name {args.target}")
 
 
@@ -283,31 +283,6 @@ def call_operator(self, op, args, kwargs, meta):
         return super().call_operator(op, args, kwargs, meta)
 
 
-@register_cadence_pass(CadencePassAttribute(opt_level=0))
-class ReplaceTCopyWithTransposePass(ExportPass):
-    """
-    Replace t_copy with transpose_copy.int. If the input is 1D, the t_copy is
-    a nop. t_copy is not supported, so this is an opt_level=0 pass.
-    """
-
-    def call_operator(self, op, args, kwargs, meta):
-        if get_edge_overload_packet(op) != exir_ops.edge.aten.t_copy:
-            return super().call_operator(op, args, kwargs, meta)
-
-        # Get the input tensor shape
-        in_tensor = args[0].to_tensor() if isinstance(args[0], ProxyValue) else args[0]
-
-        # If the input is a 1D tensor, this t_copy is a nop, so return the input
-        if in_tensor.dim() <= 1:
-            return args[0]
-
-        assert in_tensor.dim() == 2, "t_copy expects a tensor with <= 2 dimensions"
-        transpose_args = (args[0], 0, 1)
-        return super().call_operator(
-            exir_ops.edge.aten.transpose_copy.int, transpose_args, kwargs, meta
-        )
-
-
 @register_cadence_pass(CadencePassAttribute(opt_level=0))
 class ReplaceMMWithAddMMPass(ExportPass):
     """
@@ -2407,7 +2382,6 @@ class CadenceReplaceOpsInGraph:
     passes = [
         ReplaceEmptyTensorsWithFullPass,
         ReplaceFunctionallyEquivalentOpTargets,
-        ReplaceTCopyWithTransposePass,
         ReplacePermuteWithTransposePass,
         ReplaceScalarWithTensorArgPass,
         ReplaceConvolutionOptionalArgsWithConcreteArgsPass,
 
@@ -295,11 +295,12 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
             args=(permute, 4.5, 6, 0, 127, torch.int8),
         )
         builder.output(dequant)
-        graph_module = FuseQuantDequantToRequantizePass(
+        original_graph = builder.get_graph_module()
+        converted_graph = FuseQuantDequantToRequantizePass(
             force_quant_dequant_fusion=False
-        )(builder.get_graph_module()).graph_module
+        )(original_graph).graph_module
         self.check_op_counts(
-            graph_module,
+            converted_graph,
             expected_op_counts={
                 # Verify that no dequant/quant pair was replaced with requantize.
                 # quantize -> permute -> dequantize should not be replaced with requantize.
@@ -310,30 +311,28 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
         )
 
     def test_replace_quant_view_dequant_with_requantize(self):
-        class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, x):
-                x = torch.ops.quantized_decomposed.quantize_per_tensor(
-                    x, 1.2, 3, 0, 127, torch.int8
-                )
-                x = x.view(-1)
-                x = torch.ops.quantized_decomposed.dequantize_per_tensor(
-                    x, 4.5, 6, 0, 127, torch.int8
-                )
-                return x
-
-        inputs = torch.randn(2, 12, 1, 6)
-        model = M()
-        graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module
-        graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
-
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.randn(2, 12, 1, 6, dtype=torch.float32))
+        quant = builder.call_operator(
+            op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+            args=(x, 1.2, 3, 0, 127, torch.int8),
+        )
+        view = builder.call_operator(
+            op=exir_ops.edge.aten.view_copy.default, args=(quant, [-1])
+        )
+        dequant = builder.call_operator(
+            op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            args=(view, 4.5, 6, 0, 127, torch.int8),
+        )
+        builder.output(dequant)
+        original_graph = builder.get_graph_module()
+        converted_graph = FuseQuantDequantToRequantizePass()(
+            original_graph
+        ).graph_module
         self.check_op_counts(
-            graph_module,
+            converted_graph,
             expected_op_counts={
-                # Verify that no dequant/quant pair was replaced with requantize.
-                # quantize -> permute -> dequantize should not be replaced with requantize.
+                # Verify that dequant/quant pair was replaced with requantize.
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
                 exir_ops.edge.cadence.requantize.default: 1,
 
@@ -48,7 +48,6 @@
     ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
     ReplaceSplitWithSlicePass,
     ReplaceSqueezeAndUnsqueezeWithViewPass,
-    ReplaceTCopyWithTransposePass,
     ReplaceTransposedConvWithLinearPass,
     ReplaceTrivialConvWithLinear,
     ReplaceWhereWithFullArgsWithWhereScalar,
@@ -368,37 +367,6 @@ def forward(self, x: torch.Tensor):
             0,
         )
 
-    @parameterized.expand(
-        [
-            [(16, 32)],
-            [(1, 240)],
-            [(4, 16)],
-        ]
-    )
-    @torch.no_grad()
-    def test_replace_t_copy_with_transpose(self, shape: Tuple[int]):
-        class TCopy(torch.nn.Module):
-            def forward(self, x: torch.Tensor):
-                return exir_ops.edge.aten.t_copy(x)
-
-        w = torch.randn(shape)
-        inputs = (w,)
-        p1 = ReplaceTCopyWithTransposePass()
-        p2 = ReplacePermuteWithTransposePass()
-        model = TCopy()
-        graph_module = export_to_edge(model, inputs).exported_program().graph_module
-        graph_after_passes = cast(
-            PassResult, p2(cast(PassResult, p1(graph_module)).graph_module)
-        ).graph_module
-        self.assertEqual(
-            count_node(graph_after_passes, exir_ops.edge.aten.transpose_copy.int),
-            1,
-        )
-        self.assertEqual(
-            count_node(graph_after_passes, exir_ops.edge.aten.t_copy),
-            0,
-        )
-
     @parameterized.expand(
         [
             [(1, 8, 33), 8, 16, 3],
 
@@ -24,6 +24,7 @@
     op_hardtanh,
     op_leaky_relu,
     op_linear,
+    op_log,
     op_matrix_multiplication,
     op_max_dim,
     op_max_pool2d,
 
@@ -0,0 +1,52 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+import torch
+from executorch.backends.xnnpack.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
+    XNNGraph,
+    XNNLog,
+    XNode,
+)
+from executorch.backends.xnnpack.utils.utils import get_input_node
+
+
+@register_node_visitor
+class LogVisitor(NodeVisitor):
+    target = "aten.log.default"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        xnn_graph: XNNGraph,
+        vals_to_ids: Dict[torch.fx.Node, int],
+        debug_handle: int,
+    ) -> None:
+        self.define_nodes_tensor_inputs_outputs(node, xnn_graph, vals_to_ids)
+
+        # input
+        input_id = vals_to_ids[get_input_node(node, 0)]
+
+        # output
+        output_id = vals_to_ids[node]
+
+        ser_node = XNode(
+            xnode_union=XNNLog(
+                input_id=input_id,
+                output_id=output_id,
+                flags=0,
+            ),
+            debug_handle=debug_handle,
+        )
+        xnn_graph.xnodes.append(ser_node)
@@ -30,6 +30,7 @@
     # EluConfig,
     HardtanhConfig,
     LeakyReLUConfig,
+    LogConfig,
     MaximumConfig,
     MaxPool2dConfig,
     MeanDimConfig,
@@ -82,6 +83,7 @@
     HardswishConfig,
     LeakyReLUConfig,
     LinearConfig,
+    LogConfig,
     MaxDimConfig,
     MaximumConfig,
     MaxPool2dConfig,
 
@@ -357,6 +357,13 @@ def supported_precision_types(self) -> List[ConfigPrecisionType]:
         return [ConfigPrecisionType.FP32]
 
 
+class LogConfig(GenericNodePartitionerConfig):
+    target_name = "log.default"
+
+    def supported_precision_types(self) -> List[ConfigPrecisionType]:
+        return [ConfigPrecisionType.FP32]
+
+
 class MeanDimConfig(GenericNodePartitionerConfig):
     target_name = "mean.dim"
 
 
@@ -64,6 +64,7 @@
     exir_ops.edge.aten.leaky_relu.default,
     exir_ops.edge.aten.addmm.default,  # TODO(T163877189) add constraint for addmm
     exir_ops.edge.aten.rsqrt.default,
+    exir_ops.edge.aten.log.default,
 ]
 
 SUPPORTED_MODULES = [
 
@@ -1418,6 +1418,36 @@ Error defineReciprocalSquareRootNode(
   return Error::Ok;
 }
 
+/*
+Define serialized log node into the subgraph, using the remapped ids
+to map the serialized ids, to the new ids generated when defining the
+tensor value
+*/
+Error defineLogNode(
+    xnn_subgraph_t subgraph_ptr,
+    const std::unordered_map<uint32_t, uint32_t>& remapped_ids,
+    const NodePtr node,
+    const fb_xnnpack::XNNGraph* graph) noexcept {
+  MAYBE_UNUSED(graph);
+
+  auto graph_node = node->xnode_union_as_XNNLog();
+
+  xnn_status status = xnn_define_log(
+      subgraph_ptr,
+      remapped_ids.at(graph_node->input_id()),
+      remapped_ids.at(graph_node->output_id()),
+      graph_node->flags());
+
+  ET_CHECK_OR_RETURN_ERROR(
+      status == xnn_status_success,
+      Internal,
+      "Failed to create log node %i with code: %s",
+      node->debug_handle(),
+      xnn_status_to_string(status));
+
+  return Error::Ok;
+}
+
 /*
 Define serialized ceiling node into the subgraph, using the remapped ids
 to map the serialized ids, to the new ids generated when defining the
@@ -1981,6 +2011,7 @@ DefineNodeFunc getDefineNodeFunc(fb_xnnpack::XNodeUnion nodeType) {
     _DEFINE(Ceiling)
     _DEFINE(Hardswish)
     _DEFINE(LeakyReLU)
+    _DEFINE(Log)
     _DEFINE(Maximum)
     _DEFINE(Negate)
     _DEFINE(Square)
Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,7 @@`
`64`	`64`	`exir_ops.edge.aten.leaky_relu.default,`
`65`	`65`	`exir_ops.edge.aten.addmm.default, # TODO(T163877189) add constraint for addmm`
`66`	`66`	`exir_ops.edge.aten.rsqrt.default,`
	`67`	`+ exir_ops.edge.aten.log.default,`
`67`	`68`	`]`
`68`	`69`
`69`	`70`	`SUPPORTED_MODULES = [`