pytorch
diff --git a/‎.github/scripts/extract_benchmark_results.py
Lines changed: 104 additions & 49 deletions b/‎.github/scripts/extract_benchmark_results.py
Lines changed: 104 additions & 49 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 17 additions & 7 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 17 additions & 7 deletions
diff --git a/‎.github/workflows/apple-perf.yml
Lines changed: 17 additions & 7 deletions b/‎.github/workflows/apple-perf.yml
Lines changed: 17 additions & 7 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 15 additions & 16 deletions b/‎CMakeLists.txt
Lines changed: 15 additions & 16 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 22 additions & 4 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 22 additions & 4 deletions
diff --git a/‎backends/cadence/hifi/kernels/kernels.h
Lines changed: 3 additions & 3 deletions b/‎backends/cadence/hifi/kernels/kernels.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
Lines changed: 7 additions & 7 deletions b/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
Lines changed: 7 additions & 7 deletions
diff --git a/‎backends/test/README.md b/‎backends/test/README.md
diff --git a/‎backends/test/TARGETS
Lines changed: 8 additions & 0 deletions b/‎backends/test/TARGETS
Lines changed: 8 additions & 0 deletions
@@ -310,6 +310,7 @@ def transform(
     workflow_run_attempt: int,
     job_name: str,
     job_id: int,
+    schema_version: str,
 ) -> List:
     """
     Transform the benchmark results into the format writable into the benchmark database
@@ -319,45 +320,91 @@ def transform(
     for r in benchmark_results:
         r["deviceInfo"]["device"] = job_name
 
-    # TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
-    # and I'm trying to fit ET benchmark results into it, which is kind of awkward.
-    # However, the schema is going to be updated soon
-    return [
-        {
-            # GH-info to identify where the benchmark is run
-            "repo": repo,
-            "head_branch": head_branch,
-            "workflow_id": workflow_run_id,
-            "run_attempt": workflow_run_attempt,
-            "job_id": job_id,
-            # The model
-            "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
-            "dtype": (
-                r["benchmarkModel"]["quantization"]
-                if r["benchmarkModel"]["quantization"]
-                else "unknown"
-            ),
-            # The metric value
-            "metric": r["metric"],
-            "actual": r["actualValue"],
-            "target": r["targetValue"],
-            # The device
-            "device": r["deviceInfo"]["device"],
-            "arch": r["deviceInfo"].get("os", ""),
-            # Not used here, just set it to something unique here
-            "filename": workflow_name,
-            "test_name": app_type,
-            "runner": job_name,
-        }
-        for r in benchmark_results
-    ]
+    if schema_version == "v2":
+        # TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
+        return [
+            {
+                # GH-info to identify where the benchmark is run
+                "repo": repo,
+                "head_branch": head_branch,
+                "workflow_id": workflow_run_id,
+                "run_attempt": workflow_run_attempt,
+                "job_id": job_id,
+                # The model
+                "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
+                "dtype": (
+                    r["benchmarkModel"]["quantization"]
+                    if r["benchmarkModel"]["quantization"]
+                    else "unknown"
+                ),
+                # The metric value
+                "metric": r["metric"],
+                "actual": r["actualValue"],
+                "target": r["targetValue"],
+                # The device
+                "device": r["deviceInfo"]["device"],
+                "arch": r["deviceInfo"].get("os", ""),
+                # Not used here, just set it to something unique here
+                "filename": workflow_name,
+                "test_name": app_type,
+                "runner": job_name,
+            }
+            for r in benchmark_results
+        ]
+    elif schema_version == "v3":
+        quantization = (
+            r["benchmarkModel"]["quantization"]
+            if r["benchmarkModel"]["quantization"]
+            else "unknown"
+        )
+        # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+        return [
+            {
+                "benchmark": {
+                    "name": "ExecuTorch",
+                    "mode": "inference",
+                    "dtype": quantization,
+                    "extra_info": {
+                        "app_type": app_type,
+                    },
+                },
+                "model": {
+                    "name": r["benchmarkModel"]["name"],
+                    "type": "OSS model",
+                    "backend": r["benchmarkModel"].get("backend", ""),
+                    "extra_info": {
+                        "quantization": quantization,
+                    },
+                },
+                "metric": {
+                    "name": r["metric"],
+                    "benchmark_values": [r["actualValue"]],
+                    "target_value": r["targetValue"],
+                    "extra_info": {
+                        "method": r.get("method", ""),
+                    },
+                },
+                "runners": [
+                    {
+                        "name": r["deviceInfo"]["device"],
+                        "type": r["deviceInfo"]["os"],
+                        "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
+                        "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
+                    }
+                ],
+            }
+            for r in benchmark_results
+        ]
 
 
 def main() -> None:
     args = parse_args()
 
-    # Across all devices
-    all_benchmark_results = []
+    # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
+    all_benchmark_results = {
+        "v2": [],
+        "v3": [],
+    }
 
     with open(args.artifacts) as f:
         for artifact in json.load(f):
@@ -384,23 +431,31 @@ def main() -> None:
                 )
 
             if benchmark_results:
-                benchmark_results = transform(
-                    app_type,
-                    benchmark_results,
-                    args.repo,
-                    args.head_branch,
-                    args.workflow_name,
-                    args.workflow_run_id,
-                    args.workflow_run_attempt,
-                    job_name,
-                    extract_job_id(args.artifacts),
-                )
-                all_benchmark_results.extend(benchmark_results)
+                for schema in all_benchmark_results.keys():
+                    results = transform(
+                        app_type,
+                        benchmark_results,
+                        args.repo,
+                        args.head_branch,
+                        args.workflow_name,
+                        args.workflow_run_id,
+                        args.workflow_run_attempt,
+                        job_name,
+                        extract_job_id(args.artifacts),
+                        schema,
+                    )
+                    all_benchmark_results[schema].extend(results)
+
+    for schema in all_benchmark_results.keys():
+        if not all_benchmark_results.get(schema):
+            continue
+
+        output_dir = os.path.join(args.output_dir, schema)
+        os.mkdir(output_dir)
 
-    if all_benchmark_results:
         output_file = os.path.basename(args.artifacts)
-        with open(f"{args.output_dir}/{output_file}", "w") as f:
-            json.dump(all_benchmark_results, f)
+        with open(f"{output_dir}/{output_file}", "w") as f:
+            json.dump(all_benchmark_results[schema], f)
 
 
 if __name__ == "__main__":
 
@@ -298,15 +298,25 @@ jobs:
               --workflow-run-attempt ${{ github.run_attempt }}
           done
 
-          ls -lah benchmark-results
-
-          for BENCHMARK_RESULTS in benchmark-results/*.json; do
-            cat "${BENCHMARK_RESULTS}"
-            echo
+          for SCHEMA in v2 v3; do
+            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
+              cat "${BENCHMARK_RESULTS}"
+              echo
+            done
           done
 
-      - name: Upload the benchmark results
+      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
+      - name: Upload the benchmark results (v2)
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: benchmark-results/v2
+          dry-run: false
+          schema-version: v2
+
+      - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with:
-          benchmark-results-dir: 'benchmark-results'
+          benchmark-results-dir: benchmark-results/v3
           dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -372,15 +372,25 @@ jobs:
               --workflow-run-attempt ${{ github.run_attempt }}
           done
 
-          ls -lah benchmark-results
-
-          for BENCHMARK_RESULTS in benchmark-results/*.json; do
-            cat "${BENCHMARK_RESULTS}"
-            echo
+          for SCHEMA in v2 v3; do
+            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
+              cat "${BENCHMARK_RESULTS}"
+              echo
+            done
           done
 
-      - name: Upload the benchmark results
+      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
+      - name: Upload the benchmark results (v2)
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: benchmark-results/v2
+          dry-run: false
+          schema-version: v2
+
+      - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with:
-          benchmark-results-dir: 'benchmark-results'
+          benchmark-results-dir: benchmark-results/v3
           dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -56,6 +56,21 @@ if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Debug)
 endif()
 
+# Setup RPATH.
+# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
+# Use separate rpaths during build and install phases
+set(CMAKE_SKIP_BUILD_RPATH OFF)
+# Don't use the install-rpath during the build phase
+set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
+# Automatically add all linked folders that are NOT in the build directory to
+# the rpath (per library?)
+# TODO: Doesn't work for us right now because we are not installing .so's into the
+# correct locations. For example we have libcustom_ops_aot_lib.so depending on
+# _portable_lib.so, which was eventually put under <site-packages>/executorch/extension/pybindings/
+# but this rpath is not automatically added because at build time it seems `portable_lib`
+# is being built under the same directory, so no extra rpath is being added. To
+# properly fix this we need to install `portable_lib` into the correct path.
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
 # ------------------------------ OPTIONS -------------------------------------
 # WARNING: Please don't add example specific options in this CMakeLists.txt.
 # Instead please use `find_package(executorch REQUIRED)` in the example
@@ -682,22 +697,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
 endif()
 
 if(EXECUTORCH_BUILD_PYBIND)
-  # Setup RPATH.
-  # See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
-  if(APPLE)
-    set(CMAKE_MACOSX_RPATH ON)
-    set(_rpath_portable_origin "@loader_path")
-  else()
-    set(_rpath_portable_origin $ORIGIN)
-  endif(APPLE)
-  # Use separate rpaths during build and install phases
-  set(CMAKE_SKIP_BUILD_RPATH  FALSE)
-  # Don't use the install-rpath during the build phase
-  set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
-  set(CMAKE_INSTALL_RPATH "${_rpath_portable_origin}")
-  # Automatically add all linked folders that are NOT in the build directory to
-  # the rpath (per library?)
-  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
 
   if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
 
@@ -28,6 +28,7 @@
     to_edge,
 )
 from executorch.exir.pass_base import PassResult
+from torch._inductor.decomposition import remove_decompositions
 from torch.ao.quantization.pt2e.export_utils import model_is_exported
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -58,16 +59,33 @@ def convert_pt2(
     Returns a GraphModule with the converted model.
     """
 
+    # Get default decompositions
+    decomp_table = torch.export.default_decompositions()
+    # Select ops to keep
+    ops_to_keep = [
+        torch.ops.aten.conv1d.default,
+        torch.ops.aten.conv2d.default,
+        torch.ops.aten.layer_norm.default,
+        torch.ops.aten.linear.default,
+        torch.ops.aten.matmul.default,
+    ]
+    # Remove decompositions for the ops we want to keep
+    # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
+    remove_decompositions(decomp_table, ops_to_keep)
     # Export with dynamo
-    model_gm = torch.export.export_for_training(model, inputs).module()
+    model_gm = (
+        torch.export.export_for_training(model, inputs)
+        .run_decompositions(decomp_table)
+        .module()
+    )
 
-    if model_gm_has_SDPA(model_gm):  # pyre-fixme[6]
+    if model_gm_has_SDPA(model_gm):
         # Decompose SDPA
-        DecomposeScaledDotProductAttention(False)(model_gm)  # pyre-fixme[6]
+        DecomposeScaledDotProductAttention(False)(model_gm)
 
         # Swap _safe_softmax with _softmax (see https://github.com/pytorch/pytorch/pull/133882
         # for details).
-        result = ReplaceSafeSoftmaxWithSoftmax()(model_gm)  # pyre-fixme[6]
+        result = ReplaceSafeSoftmaxWithSoftmax()(model_gm)
         assert result is not None
         model_gm = result.graph_module
 
 
@@ -92,9 +92,9 @@ extern "C" WORD32 xa_nn_elm_mul_broadcast_4D_f32xf32_f32(
     const WORD32* const p_inp2_shape);
 
 extern "C" void xa_nn_elm_pow_f32(
-    FLOAT32* restrict z,
-    const FLOAT32* restrict x,
-    const FLOAT32* restrict y,
+    FLOAT32* __restrict__ z,
+    const FLOAT32* __restrict__ x,
+    const FLOAT32* __restrict__ y,
     WORD32 N);
 
 extern "C" WORD32 xa_nn_elm_where_f32xf32_f32(
 
@@ -19,12 +19,12 @@
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 ******************************************************************************/
-#include "nnlib-hifi4/xa_nnlib/include/xa_type_def.h"
-#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_common_fpu.h"
-#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nn_common.h"
-#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_err_chk.h"
-#include "nnlib-hifi4/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_basic_state.h"
-#include "nnlib-hifi4/xa_nnlib/include/nnlib/xa_nnlib_kernels_api.h"
+#include "xa_type_def.h"
+#include "xa_nnlib_common_fpu.h"
+#include "xa_nn_common.h"
+#include "xa_nnlib_err_chk.h"
+// #include "xa_nn_basic_state.h"
+#include "xa_nnlib_kernels_api.h"
 
 #if !HAVE_VFPU
 DISCARD_FUN_FOR_NONVOID_RETURN(
@@ -844,4 +844,4 @@ WORD32 xa_nn_elm_minimum_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out,
   }
   return 0;
 }
-#endif
+#endif
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets(is_fbcode = True)