pytorch
diff --git a/‎Package.swift
Lines changed: 3 additions & 1 deletion b/‎Package.swift
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/apple/mps/setup.md
Lines changed: 3 additions & 3 deletions b/‎backends/apple/mps/setup.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/operator_support/convolution_support.py
Lines changed: 8 additions & 1 deletion b/‎backends/arm/operator_support/convolution_support.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎backends/arm/operators/op_abs.py
Lines changed: 129 additions & 5 deletions b/‎backends/arm/operators/op_abs.py
Lines changed: 129 additions & 5 deletions
@@ -96,8 +96,10 @@ let package = Package(
         .copy("resources/add.pte")
       ],
       linkerSettings: [
+        .linkedLibrary("c++"),
         .unsafeFlags([
-          "-Xlinker", "-all_load",
+          "-Xlinker", "-force_load",
+          "-Xlinker", "cmake-out/kernels_portable.xcframework/macos-arm64/libkernels_portable_macos.a",
         ])
       ]
     )
 
@@ -76,12 +76,12 @@ cd executorch
 ## Run the mv3 generated model using the mps_executor_runner
 
 ```bash
-./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program
+./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program
 ```
 
 - You should see the following results. Note that no output file will be generated in this example:
 ```
-I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_bundled_fp16.pte is loaded.
+I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_float16_bundled.pte is loaded.
 I 00:00:00.003306 executorch:mps_executor_runner.mm:292] Program methods: 1
 I 00:00:00.003308 executorch:mps_executor_runner.mm:294] Running method forward
 I 00:00:00.003311 executorch:mps_executor_runner.mm:349] Setting up non-const buffer 1, size 606112.
@@ -118,7 +118,7 @@ python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_
 ```
 2. Run your Program on the ExecuTorch runtime and generate an [ETDump](../../../docs/source/etdump.md).
 ```
-./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
+./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program --dump-outputs
 ```
 3. Create an instance of the Inspector API by passing in the ETDump you have sourced from the runtime along with the optionally generated ETRecord from step 1.
 ```bash
 
@@ -11,7 +11,11 @@
     register_tosa_support_check,
     SupportedTOSAOperatorCheck,
 )
-from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
+from executorch.backends.arm.tosa_specification import (
+    Tosa_0_80,
+    Tosa_1_00,
+    TosaSpecification,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
@@ -43,6 +47,9 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
 
         # Hardware specific constraints
         if not (isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset):
+            # TODO remove this once TOSA 1.0 support for u55 is added.
+            if isinstance(tosa_spec, Tosa_1_00) and "u55" in tosa_spec.extensions:
+                return False
             return True
         else:
             return self._is_node_supported_u55(node)
 
@@ -4,12 +4,11 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
-from typing import List
+from typing import Any, List
 
 import executorch.backends.arm.tosa_quant_utils as tqutils
 import executorch.backends.arm.tosa_utils as tutils
 
-import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -33,10 +32,13 @@ def __init__(self, *args):
     def define_node(
         self,
         node: Node,
-        tosa_graph: ts.TosaSerializer,
+        tosa_graph: Any,
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
+
+        import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
+
         # Specification (0.80) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
@@ -53,7 +55,7 @@ def define_node(
         if inputs[0].dtype == ts.DType.INT8:
             rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
                 tosa_graph, inputs, node
-            )
+            )  # type: ignore[possibly-undefined]
         else:
             # input[0].dtype == ts.DType.INT32
             # Non quantized input, natively support by TOSA.abs
@@ -96,10 +98,13 @@ def __init__(self, *args):
     def define_node(
         self,
         node: Node,
-        tosa_graph: ts.TosaSerializer,
+        tosa_graph: Any,
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
+
+        import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
+
         # Specification (0.80) states that input and output types
         # should all be the same
         if not (inputs[0].dtype == output.dtype):
@@ -129,3 +134,122 @@ def define_node(
                 [output.name],
                 None,
             )
+
+
+@register_node_visitor
+class AbsVisitor_INT(NodeVisitor):
+    target = "aten.abs.default"
+
+    tosa_specs = [
+        TosaSpecification.create_from_string("TOSA-1.0+INT"),
+    ]
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: Node,
+        tosa_graph: Any,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+
+        import serializer.tosa_serializer as ts  # type: ignore
+
+        # Specification (1.0) states that input and output types
+        # should all be the same
+        if not (inputs[0].dtype == output.dtype):
+            raise ValueError(
+                "All inputs and outputs need same dtype."
+                f"Got {inputs[0].dtype=}, {output.dtype=}"
+            )
+        # Handle int8 (quantized) and int32
+        if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
+            raise ValueError(
+                "All inputs need to be INT8 or INT32." f"Got {inputs[0].dtype=}"
+            )
+
+        scale_back = 1.0
+        if inputs[0].dtype == ts.DType.INT8:
+            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
+                tosa_graph, inputs, node, self.tosa_specs
+            )  # type: ignore[possibly-undefined]
+        else:
+            # input[0].dtype == ts.DType.INT32
+            # Non quantized input, natively support by TOSA.abs
+            rescaled_inputs = inputs
+
+        if output.dtype == ts.DType.INT8:
+            broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
+            abs_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
+        else:
+            # output.dtype == ts.DType.INT32
+            abs_output = output
+
+        # Do the INT32 Abs
+        tosa_graph.addOperator(
+            ts.TosaOp.Op().ABS,
+            [
+                rescaled_inputs[0].name,
+            ],
+            [abs_output.name],
+            None,
+        )
+
+        if output.dtype == ts.DType.INT8:
+            # Scale output back to 8 bit
+            # pyre-ignore
+            tqutils.insert_rescale_op_to_int8(
+                tosa_graph, abs_output, scale_back, node, self.tosa_specs
+            )  # type: ignore[possibly-undefined]
+
+
+@register_node_visitor
+class AbsVisitor_FP(AbsVisitor_INT):
+    # inheriting 'target' from BI class
+
+    tosa_specs = [TosaSpecification.create_from_string("TOSA-1.0+FP")]
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: Node,
+        tosa_graph: Any,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+
+        import serializer.tosa_serializer as ts  # type: ignore
+
+        # Specification (1.0) states that input and output types
+        # should all be the same
+        if not (inputs[0].dtype == output.dtype):
+            raise ValueError(
+                "All inputs and output need same dtype."
+                f"Got {inputs[0].dtype=}, {output.dtype=}"
+            )
+
+        if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
+            # Call the inherited define_node for handling integers
+            super().define_node(node, tosa_graph, inputs, output)
+        else:
+            # FP32 Abs lowering
+
+            if not (inputs[0].dtype == ts.DType.FP32):
+                raise ValueError(
+                    "All inputs need to be FP32." f"Got {inputs[0].dtype=}"
+                )
+
+            if not (output.dtype == ts.DType.FP32):
+                raise ValueError("All outputs need to be FP32." f"Got {output.dtype=}")
+
+            # MI lowering
+            tosa_graph.addOperator(
+                ts.TosaOp.Op().ABS,
+                [inputs[0].name],
+                [output.name],
+                None,
+            )
Original file line number	Diff line number	Diff line change
`@@ -96,8 +96,10 @@ let package = Package(`
`96`	`96`	`.copy("resources/add.pte")`
`97`	`97`	`],`
`98`	`98`	`linkerSettings: [`
	`99`	`+ .linkedLibrary("c++"),`
`99`	`100`	`.unsafeFlags([`
`100`		`- "-Xlinker", "-all_load",`
	`101`	`+ "-Xlinker", "-force_load",`
	`102`	`+ "-Xlinker", "cmake-out/kernels_portable.xcframework/macos-arm64/libkernels_portable_macos.a",`
`101`	`103`	`])`
`102`	`104`	`]`
`103`	`105`	`)`