pytorch
diff --git a/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 37 additions & 22 deletions b/‎CMakeLists.txt
Lines changed: 37 additions & 22 deletions
diff --git a/‎backends/apple/mps/operators/node_visitor.py
Lines changed: 0 additions & 1 deletion b/‎backends/apple/mps/operators/node_visitor.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/arm_backend.py
Lines changed: 5 additions & 3 deletions b/‎backends/arm/arm_backend.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎backends/arm/arm_vela.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/arm_vela.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/test/ops/test_add.py
Lines changed: 1 addition & 3 deletions b/‎backends/arm/test/ops/test_add.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎backends/arm/test/tester/arm_tester.py
Lines changed: 111 additions & 41 deletions b/‎backends/arm/test/tester/arm_tester.py
Lines changed: 111 additions & 41 deletions
@@ -20,6 +20,6 @@ jobs:
         with:
           repository: pytorch/executorch
           stable-branch: viable/strict
-          requires: '[\"pull\", \"lint\", \"Build documentation\"]'
+          requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\"]'
           secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
@@ -98,7 +98,7 @@ endif()
 # data into sections so they can be properly gc'd. -s: strip symbol.
 # -fno-exceptions -fno-rtti: disables exceptions and runtime type.
 set(CMAKE_CXX_FLAGS_RELEASE
-    "-O2 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti")
+    "-ffunction-sections -fdata-sections -fno-exceptions -fno-rtti")
 if(NOT APPLE)
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()
@@ -125,6 +125,10 @@ option(EXECUTORCH_BUILD_SIZE_TEST "Whether to build size test" OFF)
 option(EXECUTORCH_BUILD_XNNPACK
        "Build xnn_executor_runner which depends on XNNPACK" OFF)
 
+# Build the vulkan delegate along with the vulkan executor_runner
+option(EXECUTORCH_BUILD_VULKAN
+       "Build the Vulkan delegate and the Vulkan executor_runner" OFF)
+
 option(EXECUTORCH_BUILD_SDK
        "Build the ExecuTorch SDK library and the SDK example runner.")
 
@@ -144,6 +148,10 @@ option(EXECUTORCH_BUILD_EXTENSION_MODULE
 option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL
        "Build the extension/runner_util directory" OFF)
 
+# Build test binaries that rely on googletest
+option(EXECUTORCH_BUILD_GTESTS
+       "Build googletest based test binaries" OFF)
+
 if(NOT BUCK2)
   set(BUCK2 buck2)
 endif()
@@ -315,7 +323,12 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
   target_compile_options(executor_runner PUBLIC ${_common_compile_options})
 endif()
 
-# Add Android JNI subdirectory
+# Add googletest if any test targets should be built
+if(EXECUTORCH_BUILD_GTESTS)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
+endif()
+
+option(EXECUTORCH_BUILD_ANDROID_JNI "Build Android JNI" OFF)
 if(EXECUTORCH_BUILD_ANDROID_JNI)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
 endif()
@@ -327,6 +340,10 @@ if(EXECUTORCH_BUILD_SDK)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_APPLE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
 endif()
@@ -344,56 +361,53 @@ if(EXECUTORCH_BUILD_XNNPACK)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
 endif()
 
+if(EXECUTORCH_BUILD_VULKAN)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
+endif()
+
 option(EXECUTORCH_BUILD_QNN "Build the backends/qualcomm directory" OFF)
 if(EXECUTORCH_BUILD_QNN)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/qualcomm)
 endif()
 
-# Build Arm Baremetal backend
 option(EXECUTORCH_BUILD_ARM_BAREMETAL
        "Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF)
 if(EXECUTORCH_BUILD_ARM_BAREMETAL)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
 endif()
 
+option(EXECUTORCH_BUILD_MPS "Build the MPS Backend" OFF)
 if(EXECUTORCH_BUILD_MPS)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/mps)
 endif()
 
-# Build CoreML backend
-option(EXECUTORCH_BUILD_COREML "Build the backends/apple/coreml directory" OFF)
+option(EXECUTORCH_BUILD_COREML "Build the Core ML Backend" OFF)
 if(EXECUTORCH_BUILD_COREML)
-  # CoreML delegate library can only be built with iOS toolchain
-  if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS\.)|(ios\.toolchain\.)cmake$")
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
-  else()
-    message(
-      FATAL_ERROR "executorch: Building CoreML delegate requires iOS toolchain")
-  endif()
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
 endif()
 
-# Build pybind
-option(EXECUTORCH_BUILD_PYBIND "Build pybindings" OFF)
+option(EXECUTORCH_BUILD_PYBIND "Build the Python Bindings" OFF)
 if(EXECUTORCH_BUILD_PYBIND)
+
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
+
   if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
-    # This has already been added if above flag is on
     add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
   endif()
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
 
-  if(PYBIND_LINK_COREML)
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
+  if(NOT EXECUTORCH_BUILD_SDK)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
+  endif()
+
+  if(EXECUTORCH_BUILD_COREML)
     set(PYBIND_LINK_COREML "coremldelegate")
   endif()
 
-  if(PYBIND_LINK_MPS)
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/mps)
+  if(EXECUTORCH_BUILD_MPS)
     set(PYBIND_LINK_MPS "mpsdelegate")
   endif()
 
   if(EXECUTORCH_BUILD_XNNPACK)
-    # set PYBIND_LINK_XNNPACK variable to link with portable lib library
     set(PYBIND_LINK_XNNPACK "xnnpack_backend")
   endif()
 
@@ -443,5 +457,6 @@ if(EXECUTORCH_BUILD_PYBIND)
   install(TARGETS portable_lib
           LIBRARY DESTINATION executorch/extension/pybindings)
 endif()
+
 # Print all summary
 executorch_print_configuration_summary()
@@ -235,7 +235,6 @@ def get_serialized_data(
     def get_serialized_id(
         self, node: Union[torch.fx.Node, float, int], mps_graph: MPSGraph
     ) -> int:
-
         """
         Map a tensor to a unique id. If the tensor was already mapped, return
         the existent id.
 
@@ -128,9 +128,11 @@ def preprocess(  # noqa: C901
                 # Add output to TOSA graph
                 tosa_graph.currRegion.currBasicBlock.addTensor(
                     output.name,
-                    inputs[0].shape
-                    if is_permute_node_before_addmm(node)
-                    else output.shape,
+                    (
+                        inputs[0].shape
+                        if is_permute_node_before_addmm(node)
+                        else output.shape
+                    ),
                     ts.DType.INT8 if is_quant_node(node) else output.dtype,
                 )
 
 
@@ -12,6 +12,7 @@
 
 import numpy as np
 
+
 # Pack either input or output tensor block, compose the related arrays into
 # per-io structs to simplify runtime use.
 def vela_bin_pack_io(prefix, data):
 
@@ -88,7 +88,7 @@ def _test_add_tosa_BI_pipeline(
             .to_executorch()
         )
         if TOSA_REF_MODEL_INSTALLED:
-            tester.run_method().compare_outputs()
+            tester.run_method().compare_outputs(qtol=1)
         else:
             logger.warning(
                 "TOSA ref model tool not installed, skip numerical correctness tests"
@@ -118,8 +118,6 @@ def test_add_tosa_MI(self):
         test_data = (torch.randn(4, 4, 4),)
         self._test_add_tosa_MI_pipeline(self.Add(), test_data)
 
-    # TODO: Will this type of parametrization be supported? pytest seem
-    # have issue with it.
     @parameterized.expand(
         [
             (torch.ones(5),),  # test_data
 
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from enum import Enum
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple, Union
 
 import torch
 from executorch.backends.arm.arm_backend import (
@@ -15,6 +15,7 @@
 from executorch.backends.arm.arm_partitioner import ArmPartitioner
 
 from executorch.backends.arm.test.tosautil.tosa_test_utils import (
+    QuantizationParams,
     TosaProfile,
     TosaTestUtils,
 )
@@ -32,6 +33,7 @@
     get_symmetric_quantization_config,
     XNNPACKQuantizer,
 )
+from torch.export import ExportedProgram
 
 
 class ArmBackendSelector(Enum):
@@ -61,6 +63,7 @@ def __init__(
                 TosaProfile.BI or TosaProfile.MI
         """
         self.tosa_test_util = None
+        self.is_quantized = profile == TosaProfile.BI
         if backend == ArmBackendSelector.TOSA:
             self.tosa_test_util = TosaTestUtils(profile=profile)
             # The spec below tiggers arm_backend.py to output two files:
@@ -119,54 +122,121 @@ def run_method(
         ), "self.tosa_test_util is not initialized, cannot use run_method()"
         inputs_to_run = inputs or self.inputs
 
-        # TODO: we can't possible need to use all these stages??
-        export_stage = self.stages[
-            self.stage_name(Export)
-        ]  # this is what XNNpack use to get quant params
-        toedge_stage = self.stages[
-            self.stage_name(ToEdge)
-        ]  # this is what get_input_quantization_params use to get quant params
-        partition_stage = self.stages[
-            self.stage_name(Partition)
-        ]  # this is what tosa_ref_dump_inputs use....
-
-        # TODO: I'd prefer to use this TOSA buffer instead of output.tosa,
-        # generated by arm_backend.py. The issue is that we're still depending
-        # on desc.json, which is created from TosaSerializer class, not from
-        # the serialized TOSA buffer. Leave this here for review purposes.
-        # ts_serialized = self._get_serialized_tosa_buffer(  # unused
-        #     partition_stage.artifact
-        # )
-
-        # This is where the torch reference output is calculated and set
-        # TODO: This sets self.quantization_scale, which is duplicates
-        # self.tosa_test_util.quantization.output.scales (?). Fixme.
-        (
-            self.reference_output,
-            self.quantization_scale,
-        ) = self._calculate_reference_output(export_stage.artifact, inputs_to_run)
-
-        # Convert the torch inputs to something TOSA ref model can use
-        tensor_names_and_inputs_np = self.tosa_test_util.convert_inputs_to_tosa(
-            partition_stage.artifact, toedge_stage.artifact, inputs_to_run
+        export_stage = self.stages[self.stage_name(Export)]
+
+        (input_names, qp_input) = self._get_input_params(export_stage.artifact)
+        (output_name, qp_output) = self._get_output_param(export_stage.artifact)
+
+        # Calculate the reference output using the original module or the quant
+        # module. self.quantization_scale is used by compare_outputs() to
+        # calculate the tolerance
+        self.quantization_scale = None if qp_output is None else qp_output.scale
+        if self.is_quantized:
+            module_for_ref = self.stages[self.stage_name(Quantize)].artifact
+        else:
+            module_for_ref = self.original_module
+        self.reference_output = self._calculate_reference_output(
+            module_for_ref, inputs_to_run
         )
 
         # Run the TOSA ref model to get the output tensor, which will be
         # compared to the torch output in compare_outputs()
         self.stage_output = self.tosa_test_util.run_tosa_ref_model(
-            tensor_names_and_inputs_np
+            params_input=(input_names, qp_input),
+            param_output=(output_name, qp_output),
+            inputs=inputs_to_run,
         )
 
         return self
 
-    def _get_serialized_tosa_buffer(self, partition_stage: Partition) -> bytes:
+    def _get_input_params(
+        self, program: ExportedProgram
+    ) -> Tuple[str, Union[List[QuantizationParams], List[None]]]:
         """
-        This is just a prototype...
-        Todo:
-            * The "_0" indicates that there are many lowered modules. Loop it!
-            * There's probably a better way to get this buffer. An API? Yes,
-              it seems the serialize stage does this for you...
+        Get name and optionally quantization parameters for the inputs to this
+        model.
+
+        Args:
+            program (ExportedProgram): The program to get input parameters from
+        Returns:
+            Tuple[str, Optional[QuantizationParams]]: A tuple containing the
+                input node names and their quantization parameters.
+        """
+        input_names = []
+        # E.g. bias and weights are 'placeholders' as well. This is used to
+        # get only the use inputs.
+        usr_inputs = program.graph_signature.user_inputs
+        for node in program.graph.nodes:
+            if node.op == "placeholder" and node.name in usr_inputs:
+                input_names.append(node.name)
+                continue
+
+        if self.is_quantized:
+            quant_params = []
+            for node in program.graph.nodes:
+                if (
+                    node.target
+                    == torch.ops.quantized_decomposed.quantize_per_tensor.default
+                    and node.args[0].name in input_names
+                ):
+                    qp = QuantizationParams(
+                        node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                    )
+                    quant_params.append(qp)
+                    if len(quant_params) == len(
+                        input_names
+                    ):  # break early if we have all the inputs quantized parameters
+                        break
+            assert len(quant_params) != 0, "Quantization paramerters not found"
+            return (input_names, quant_params)
+        else:
+            return (input_names, len(input_names) * [None])  # return a list of None's
+
+    def _get_output_param(
+        self, program: ExportedProgram
+    ) -> Tuple[str, Union[QuantizationParams, None]]:
         """
-        return partition_stage._edge_programs[
-            "forward"
-        ]._graph_module.lowered_module_0.processed_bytes
+        Get name and optionally quantization parameters for the inputs to this
+        model.
+
+        Args:
+            program (ExportedProgram): The program to get output parameters from.
+        Returns:
+            Tuple[str, Optional[QuantizationParams]]: A tuple containing the
+                output node name and its quantization parameters.
+        """
+        output_node = None
+        for node in program.graph.nodes:
+            if node.op == "output":
+                output_node = node
+                break
+
+        if self.is_quantized:
+            quant_params = None
+            for node in program.graph.nodes:
+                if (
+                    node.target
+                    == torch.ops.quantized_decomposed.dequantize_per_tensor.default
+                    and node == output_node.args[0][0]
+                ):
+                    quant_params = QuantizationParams(
+                        node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                    )
+                    break  # break early, there's only one output node
+            assert quant_params is not None, "Quantization paramerters not found"
+            return (output_node.name, quant_params)
+        else:
+            return (output_node.name, None)
+
+    @staticmethod
+    def _calculate_reference_output(
+        module: Union[torch.fx.GraphModule, torch.nn.Module], inputs
+    ) -> torch.Tensor:
+        """
+        Note: I'd prefer to use the base class method here, but since it use the
+        exported program, I can't. The partitioner stage clears the state_dict
+        of the exported program, which causes an issue when evaluating the
+        module.
+        """
+
+        return module.forward(*inputs)