pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/android.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/apple-perf.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/apple-perf.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/apple.yml
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/apple.yml
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/upload-android-test-specs.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/upload-android-test-specs.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/arm/runtime/ArmBackendEthosU.cpp
Lines changed: 28 additions & 13 deletions b/‎backends/arm/runtime/ArmBackendEthosU.cpp
Lines changed: 28 additions & 13 deletions
diff --git a/‎backends/arm/runtime/VelaBinStream.cpp
Lines changed: 11 additions & 2 deletions b/‎backends/arm/runtime/VelaBinStream.cpp
Lines changed: 11 additions & 2 deletions
diff --git a/‎backends/arm/runtime/VelaBinStream.h
Lines changed: 8 additions & 0 deletions b/‎backends/arm/runtime/VelaBinStream.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 13 additions & 4 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 13 additions & 4 deletions
@@ -1 +1 @@
-4b2970f7cd3cdd56883cacf116a8693862f89db5
+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac
@@ -1,5 +1,5 @@
 mpmath==1.3.0
-numpy==1.21.3; python_version == '3.10'
+numpy==1.22.0; python_version == '3.10'
 numpy==1.23.2; python_version == '3.11'
 numpy; python_version >= '3.12'
 PyYAML==6.0.1
 
@@ -292,7 +292,7 @@ jobs:
               --output-dir benchmark-results \
               --repo ${{ github.repository }} \
               --head-branch ${{ github.head_ref || github.ref_name }} \
-              --workflow-name ${{ github.workflow }} \
+              --workflow-name "${{ github.workflow }}" \
               --workflow-run-id ${{ github.run_id }} \
               --workflow-run-attempt ${{ github.run_attempt }}
           done
 
@@ -15,6 +15,7 @@ on:
       - install_requirements.sh
       - examples/demo-apps/android/**
       - extension/android/**
+      - extension/benchmark/android/**
       - extension/module/**
   workflow_dispatch:
 
 
@@ -235,17 +235,17 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
           build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack
 
-        mkdir -p extension/apple/Benchmark/Frameworks
+        mkdir -p extension/benchmark/apple/Benchmark/Frameworks
         for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
-          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/apple/Benchmark/Frameworks/
+          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/
         ) done
         echo "::endgroup::"
 
         # NB: Although exported models can be copied to this directory and bundled together with the
         # app, we don't use this in CI and rely on AWS extra data parameter to make the model and the
         # tokenizer available to the benchmark. This decouples the app and the model. We just need to
         # create the directory here to pass the build
-        mkdir -p extension/apple/Benchmark/Models
+        mkdir -p extension/benchmark/apple/Benchmark/Models
         ${CONDA_RUN} --no-capture-output \
           build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
 
@@ -18,6 +18,7 @@ on:
       - build/test_ios_ci.sh
       - examples/demo-apps/apple_ios/**
       - extension/apple/**
+      - extension/benchmark/apple/**
       - extension/module/**
   workflow_dispatch:
 
@@ -272,14 +273,14 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
           build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack
 
-        mkdir -p extension/apple/Benchmark/Frameworks
+        mkdir -p extension/benchmark/apple/Benchmark/Frameworks
         for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
-          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/apple/Benchmark/Frameworks/
+          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/
         ) done
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch benchmark app"
-        mkdir -p extension/apple/Benchmark/Models
+        mkdir -p extension/benchmark/apple/Benchmark/Models
         ${CONDA_RUN} --no-capture-output \
           build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}
         echo "::endgroup::"
@@ -66,7 +66,7 @@ jobs:
         FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
           examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \
           examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
-          extension/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
+          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
         if [ -n "$FILES_NEEDS_FORMAT" ]; then
           echo "Warning: The following files need formatting. Please use google-java-format."
           echo "Use a binary from https://github.com/google/google-java-format/releases/"
 
@@ -4,13 +4,13 @@ on:
   pull_request:
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - extension/android/benchmark/android-llm-device-farm-test-spec.yml
+      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
   push:
     branches:
       - main
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - extension/android/benchmark/android-llm-device-farm-test-spec.yml
+      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
 
 concurrency:
   # NB: This concurency group needs to be different than the one used in android-perf, otherwise
@@ -32,7 +32,7 @@ jobs:
             ${{ github.repository }}/${{ github.run_id }}/artifacts
           retention-days: 1
           if-no-files-found: error
-          path: extension/android/benchmark/android-llm-device-farm-test-spec.yml
+          path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
 
   validate-android-test-spec:
     needs: upload-android-test-spec-for-validation
@@ -77,7 +77,7 @@ jobs:
 
       - name: Upload the spec to S3 ossci-android bucket
         shell: bash
-        working-directory: extension/android/benchmark/
+        working-directory: extension/benchmark/android/benchmark/
         env:
           SPEC_FILE: android-llm-device-farm-test-spec.yml
         run: |
 
@@ -4,13 +4,13 @@ on:
   pull_request:
     paths:
       - .github/workflows/upload-apple-test-specs.yml
-      - examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+      - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
   push:
     branches:
       - main
     paths:
       - .github/workflows/upload-apple-test-specs.yml
-      - examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+      - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
 
 concurrency:
   # NB: This concurency group needs to be different than the one used in apple-perf, otherwise
@@ -32,7 +32,7 @@ jobs:
             ${{ github.repository }}/${{ github.run_id }}/artifacts
           retention-days: 1
           if-no-files-found: error
-          path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+          path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
 
   validate-apple-test-spec:
     needs: upload-apple-test-spec-for-validation
@@ -78,7 +78,7 @@ jobs:
 
       - name: Upload the spec to S3 ossci-ios bucket
         shell: bash
-        working-directory: examples/demo-apps/apple_ios
+        working-directory: extension/benchmark/apple/Benchmark/
         env:
           SPEC_FILE: default-ios-device-farm-appium-test-spec.yml
         run: |
 
@@ -15,17 +15,31 @@
 
 #include <ethosu_driver.h>
 
-#include "executorch/backends/arm/runtime/VelaBinStream.h"
-#include "executorch/runtime/backend/interface.h"
-#include "executorch/runtime/core/error.h"
-#include "executorch/runtime/core/evalue.h"
-#include "executorch/runtime/core/exec_aten/util/dim_order_util.h"
-#include "executorch/runtime/core/exec_aten/util/scalar_type_util.h"
+#include <executorch/backends/arm/runtime/VelaBinStream.h>
+#include <executorch/runtime/backend/interface.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/evalue.h>
+#include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
 using namespace std;
 
-namespace torch {
-namespace executor {
+using executorch::aten::ScalarType;
+using executorch::runtime::ArrayRef;
+using executorch::runtime::Backend;
+using executorch::runtime::BackendExecutionContext;
+using executorch::runtime::BackendInitContext;
+using executorch::runtime::CompileSpec;
+using executorch::runtime::DelegateHandle;
+using executorch::runtime::Error;
+using executorch::runtime::EValue;
+using executorch::runtime::FreeableBuffer;
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::Result;
+
+namespace executorch {
+namespace backends {
+namespace arm {
 
 typedef struct {
   FreeableBuffer* processed;
@@ -142,10 +156,10 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
             Error,
             "Input %d expected Integer (4 byte) or Char (1 byte) integer inputs, got ScalarType id %s",
             i,
-            toString(tensor_in.scalar_type()));
+            executorch::runtime::toString(tensor_in.scalar_type()));
         return Error::InvalidProgram;
       }
-      supported = is_contiguous_dim_order(
+      supported = executorch::runtime::is_contiguous_dim_order(
           tensor_in.dim_order().data(), tensor_in.dim());
       if (!supported) {
         ET_LOG(
@@ -268,7 +282,7 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
  private:
   Error check_requires_permute(
       int index,
-      const exec_aten::Tensor tensor,
+      const executorch::aten::Tensor tensor,
       VelaIO* io,
       bool permuted_io_flag,
       bool* is_permuted) const {
@@ -343,5 +357,6 @@ Backend backend_id{"ArmBackend", &backend};
 static auto registered = register_backend(backend_id);
 } // namespace
 
-} // namespace executor
-} // namespace torch
+} // namespace arm
+} // namespace backends
+} // namespace executorch
@@ -10,10 +10,15 @@
  *          as that function emits this format and the two need to align.
  */
 
+#include <executorch/backends/arm/runtime/VelaBinStream.h>
+
 #include <cstring>
 
-#include "executorch/backends/arm/runtime/VelaBinStream.h"
-#include "executorch/runtime/core/error.h"
+#include <executorch/runtime/core/error.h>
+
+namespace executorch {
+namespace backends {
+namespace arm {
 
 // get next mul of 16 ptr, return n if already aligned
 static uintptr_t next_mul_16(uintptr_t n) {
@@ -91,3 +96,7 @@ bool vela_bin_read(const char* data, VelaHandles* handles, int size) {
   // We've fallen off the end without finding vela_end_stream
   return false;
 }
+
+} // namespace arm
+} // namespace backends
+} // namespace executorch
@@ -18,6 +18,10 @@
 #include <cstddef>
 #include <cstdint>
 
+namespace executorch {
+namespace backends {
+namespace arm {
+
 // Standard block name size
 const uint32_t kVelaBlockNameLength = 16;
 
@@ -67,3 +71,7 @@ bool vela_bin_read(const char* data, VelaHandles* handles, int size);
  * on the Ethos-U.
  */
 bool vela_bin_validate(const char* data, int size);
+
+} // namespace arm
+} // namespace backends
+} // namespace executorch
@@ -22,6 +22,7 @@ python_library(
     deps = [
         "fbsource//third-party/pypi/tabulate:tabulate",
         "//caffe2:torch",
+        "//executorch/exir:lib",
         "//executorch/exir:memory",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/dialects/edge:lib",
 
@@ -36,6 +36,8 @@
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
 
+from .utils import print_ops_info
+
 
 # Note: this is not meant as a primary API since it can create inconsistencies
 # if the quantizer here is different from the quantizer used to convert. It is
@@ -193,16 +195,17 @@ def export_to_edge(
 
 
 # Export the model and lower it to an EdgeProgramManager (in edge IR), and
-# apply passes specific to Cadence DSP execution.
+# apply passes specific to Cadence DSP execution. Return both to print the
+# differences.
 def export_to_cadence(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
 ) -> EdgeProgramManager:
-    edge_program_manager = export_to_edge(model, inputs)
+    edge_prog_manager = export_to_edge(model, inputs)
 
     # Run a couple required passes for quant/dequant ops
-    cadence_program_manager = edge_program_manager.transform(
+    cadence_prog_manager = edge_prog_manager.transform(
         [
             InitializePipeline(),
             RemoveZeroSizedCatArgsPass(),
@@ -216,4 +219,10 @@ def export_to_cadence(
         ]
     )
 
-    return cadence_program_manager
+    # Print some information to terminal
+    print_ops_info(
+        edge_prog_manager.exported_program().graph_module,
+        cadence_prog_manager.exported_program().graph_module,
+    )
+
+    return cadence_prog_manager
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-4b2970f7cd3cdd56883cacf116a8693862f89db5`
	`1`	`+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac`