pytorch
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 7 additions & 62 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 7 additions & 62 deletions
diff --git a/‎.github/workflows/android.yml
Lines changed: 1 addition & 32 deletions b/‎.github/workflows/android.yml
Lines changed: 1 addition & 32 deletions
diff --git a/‎.github/workflows/apple-perf.yml
Lines changed: 8 additions & 34 deletions b/‎.github/workflows/apple-perf.yml
Lines changed: 8 additions & 34 deletions
diff --git a/‎.github/workflows/upload-android-test-specs.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/upload-android-test-specs.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/qualcomm/CMakeLists.txt
Lines changed: 2 additions & 1 deletion b/‎backends/qualcomm/CMakeLists.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Lines changed: 4 additions & 2 deletions b/‎backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/qualcomm/runtime/QnnManager.cpp
Lines changed: 19 additions & 22 deletions b/‎backends/qualcomm/runtime/QnnManager.cpp
Lines changed: 19 additions & 22 deletions
diff --git a/‎backends/qualcomm/runtime/QnnManager.h
Lines changed: 3 additions & 2 deletions b/‎backends/qualcomm/runtime/QnnManager.h
Lines changed: 3 additions & 2 deletions
@@ -140,6 +140,7 @@ jobs:
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models
+      upload-artifact-to-s3: true
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         echo "::group::Setting up dev environment"
@@ -175,35 +176,6 @@ jobs:
         fi
         echo "::endgroup::"
 
-  # Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
-  upload-models:
-    needs: export-models
-    runs-on: linux.2xlarge
-    if: always()  # Continue this job regardless of previous job outcome
-    steps:
-      - name: Download the models from GitHub
-        uses: actions/download-artifact@v3
-        with:
-          # The name here needs to match the name of the upload-artifact parameter
-          name: android-models
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Verify the models
-        shell: bash
-        working-directory: ${{ runner.temp }}/artifacts/
-        run: |
-          ls -lah ./
-
-      - name: Upload the models to S3
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
-          retention-days: 1
-          if-no-files-found: ignore
-          path: ${{ runner.temp }}/artifacts/
-
   build-llm-demo:
     name: build-llm-demo
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -215,6 +187,7 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       upload-artifact: android-apps
+      upload-artifact-to-s3: true
       script: |
         set -eux
 
@@ -230,34 +203,6 @@ jobs:
         export ANDROID_ABIS="arm64-v8a"
         PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
-  # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
-  upload-android-apps:
-    needs: build-llm-demo
-    runs-on: linux.2xlarge
-    steps:
-      - name: Download the apps from GitHub
-        uses: actions/download-artifact@v3
-        with:
-          # The name here needs to match the name of the upload-artifact parameter
-          name: android-apps
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Verify the apps
-        shell: bash
-        working-directory: ${{ runner.temp }}/artifacts/
-        run: |
-          ls -lah ./
-
-      - name: Upload the apps to S3
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
-          retention-days: 14
-          if-no-files-found: ignore
-          path: ${{ runner.temp }}/artifacts/
-
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   benchmark-on-device:
     permissions:
@@ -266,8 +211,8 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
     needs:
       - set-parameters
-      - upload-models
-      - upload-android-apps
+      - build-llm-demo
+      - export-models
     strategy:
       matrix:
         model: ${{ fromJson(needs.set-parameters.outputs.models) }}
@@ -285,9 +230,9 @@ jobs:
       # Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
       # It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
       # one app+flavor that could load and run the model.
-      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug.apk
-      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug-androidTest.apk
+      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
+      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
       # NB: Need to set the default spec here so that it works for periodic too
       test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
       # Uploaded to S3 from the previous job
-      extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
+      extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
@@ -33,6 +33,7 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       upload-artifact: android-apps
+      upload-artifact-to-s3: true
       script: |
         set -eux
 
@@ -45,38 +46,6 @@ jobs:
         # Build LLM Demo for Android
         bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
-  # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
-  upload-artifacts:
-    needs: build-llm-demo
-    runs-on: linux.2xlarge
-    steps:
-      - name: Download the artifacts from GitHub
-        uses: actions/download-artifact@v3
-        with:
-          # The name here needs to match the name of the upload-artifact parameter
-          name: android-apps
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Verify the artifacts
-        shell: bash
-        working-directory: ${{ runner.temp }}/artifacts/
-        run: |
-          ls -lah ./
-
-      - name: Upload the artifacts to S3
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
-          # NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to
-          # benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now
-          # for TorchChat until we have a periodic job can publish it more often. Ideally I want to
-          # reduce it to <= 2 day, meaning the benchmark job will run daily.
-          retention-days: 14
-          if-no-files-found: ignore
-          path: ${{ runner.temp }}/artifacts/
-
   # Running Android emulator directly on the runner and not using Docker
   run-emulator:
     needs: build-llm-demo
 
@@ -124,11 +124,13 @@ jobs:
           delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
       fail-fast: false
     with:
-      runner: macos-latest-xlarge
+      # NB: Need to use our AWS MacOS runner to upload large models to S3
+      runner: macos-m1-stable
       python-version: '3.11'
       submodules: 'true'
       timeout: 60
       upload-artifact: ios-models
+      upload-artifact-to-s3: true
       script: |
         set -eux
 
@@ -176,34 +178,6 @@ jobs:
         fi
         echo "::endgroup::"
 
-  upload-models:
-    needs: export-models
-    runs-on: linux.2xlarge
-    if: always()  # Continue this job regardless of previous job outcome
-    steps:
-      - name: Download the models from GitHub
-        uses: actions/download-artifact@v3
-        with:
-          # The name here needs to match the name of the upload-artifact parameter
-          name: ios-models
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Verify the models
-        shell: bash
-        working-directory: ${{ runner.temp }}/artifacts/
-        run: |
-          ls -lah ./
-
-      - name: Upload the models to S3
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
-          retention-days: 1
-          if-no-files-found: ignore
-          path: ${{ runner.temp }}/artifacts/
-
   build-benchmark-app:
     name: build-benchmark-app
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -297,7 +271,7 @@ jobs:
         with:
           s3-bucket: gha-artifacts
           s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
+            ${{ github.repository }}/${{ github.run_id }}/artifacts
           retention-days: 14
           if-no-files-found: ignore
           path: ${{ runner.temp }}/artifacts/
@@ -306,7 +280,7 @@ jobs:
     needs:
       - set-parameters
       - upload-benchmark-app
-      - upload-models
+      - export-models
     permissions:
       id-token: write
       contents: read
@@ -326,7 +300,7 @@ jobs:
       project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
       device-pool-arn: ${{ matrix.device }}
       # Uploaded to S3 from the previous job
-      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.ipa
-      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.xctestrun.zip
+      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
+      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
       test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
-      extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
+      extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
@@ -29,7 +29,7 @@ jobs:
         with:
           s3-bucket: gha-artifacts
           s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
+            ${{ github.repository }}/${{ github.run_id }}/artifacts
           retention-days: 1
           if-no-files-found: error
           path: extension/android/benchmark/android-llm-device-farm-test-spec.yml
@@ -45,7 +45,7 @@ jobs:
       models: stories110M
       devices: samsung_galaxy_s22
       delegates: xnnpack
-      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/android-llm-device-farm-test-spec.yml
+      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/android-llm-device-farm-test-spec.yml
 
   upload-android-test-spec:
     needs: validate-android-test-spec
 
@@ -29,7 +29,7 @@ jobs:
         with:
           s3-bucket: gha-artifacts
           s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifact
+            ${{ github.repository }}/${{ github.run_id }}/artifacts
           retention-days: 1
           if-no-files-found: error
           path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
@@ -46,7 +46,7 @@ jobs:
       models: stories110M
       devices: apple_iphone_15
       delegates: xnnpack
-      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/default-ios-device-farm-appium-test-spec.yml
+      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/default-ios-device-farm-appium-test-spec.yml
 
   upload-apple-test-spec:
     needs: validate-apple-test-spec
 
@@ -181,7 +181,7 @@ target_link_libraries(
 )
 target_link_libraries(
   qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager
-                                 executorch_no_prim_ops qcir_utils
+                                 executorch_no_prim_ops qcir_utils extension_tensor
 )
 set_target_properties(
   qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
@@ -246,6 +246,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
             qnn_executorch_header
             executorch
             qcir_utils
+            extension_tensor
   )
   target_link_libraries(
     PyQnnWrapperAdaptor PRIVATE pybind11::module pybind11::lto wrappers
 
@@ -213,8 +213,10 @@ Error QnnExecuTorchBackend::execute(
   }
 
   ET_CHECK_OR_RETURN_ERROR(
-      qnn_manager->Execute(input_tensor_structs, output_tensor_structs) ==
-          Error::Ok,
+      qnn_manager->Execute(
+          input_tensor_structs,
+          output_tensor_structs,
+          context.event_tracer()) == Error::Ok,
       Internal,
       "Fail to execute graph");
   ET_CHECK_OR_RETURN_ERROR(
 
@@ -10,6 +10,7 @@
 #include <executorch/backends/qualcomm/runtime/Utils.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
+#include <executorch/extension/tensor/tensor.h>
 #include <algorithm>
 #include <cstdlib>
 #include <cstring>
@@ -57,9 +58,7 @@ QnnManager::QnnManager(
         "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
     QNN_EXECUTORCH_LOG_INFO("graph_name: %s", options_->graph_name()->c_str());
     QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
-    QNN_EXECUTORCH_LOG_INFO(
-        "tensor_dump_output_path: %s",
-        options_->tensor_dump_output_path()->c_str());
+    QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
     QNN_EXECUTORCH_LOG_INFO(
         "log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
     QNN_EXECUTORCH_LOG_INFO(
@@ -366,7 +365,8 @@ Error QnnManager::AllocateTensor(
 
 Error QnnManager::Execute(
     const std::vector<Qnn_Tensor_t>& input_tensor_structs,
-    std::vector<Qnn_Tensor_t>& output_tensor_structs) {
+    std::vector<Qnn_Tensor_t>& output_tensor_structs,
+    EventTracer* event_tracer) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   error = backend_params_ptr_->qnn_graph_ptr_->GraphExecute(
@@ -377,30 +377,27 @@ Error QnnManager::Execute(
         "qnn_graph_execute failed. Error %d", QNN_GET_ERROR_CODE(error));
     return Error::Internal;
   }
-
   if (IsTensorDump()) {
     // TODO: Need to handle the graph which is partitioned.
     // Maybe we could use graph name.
-    std::string dir = options_->tensor_dump_output_path()->str() + "/Result/";
-    CreateDirectory(dir);
-    QNN_EXECUTORCH_LOG_INFO("Dump tensor to the path: %s", dir.c_str());
     for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
          ++out_idx) {
       const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
-
-      std::string output_path =
-          dir + QNN_VER_PTR(output_tensor)->name + "_tensor.raw";
-
-      std::ofstream fout(output_path, std::ios::binary);
-      if (fout.fail()) {
-        QNN_EXECUTORCH_LOG_ERROR(
-            "Dump tensor name: %s Failed.", QNN_VER_PTR(output_tensor)->name);
-        return Error::Internal;
-      }
-
-      fout.write(
-          static_cast<const char*>(QNN_VER_PTR(output_tensor)->clientBuf.data),
-          QNN_VER_PTR(output_tensor)->clientBuf.dataSize);
+      std::vector<exec_aten::SizesType> sizes(
+          QNN_VER_PTR(output_tensor)->dimensions,
+          QNN_VER_PTR(output_tensor)->dimensions +
+              QNN_VER_PTR(output_tensor)->rank);
+
+      auto dump_tensor = executorch::extension::from_blob(
+          QNN_VER_PTR(output_tensor)->clientBuf.data,
+          sizes,
+          qnn_dtype_to_scalar_type_[QNN_VER_PTR(output_tensor)->dataType]);
+
+      torch::executor::event_tracer_log_output_delegate<exec_aten::Tensor>(
+          event_tracer,
+          QNN_VER_PTR(output_tensor)->name,
+          /*delegate_debug_id=*/static_cast<torch::executor::DebugHandle>(-1),
+          *dump_tensor);
     }
   }
 
 
@@ -37,7 +37,8 @@ class QnnManager {
 
   Error Execute(
       const std::vector<Qnn_Tensor_t>& input_tensor_structs,
-      std::vector<Qnn_Tensor_t>& output_tensor_structs);
+      std::vector<Qnn_Tensor_t>& output_tensor_structs,
+      EventTracer* event_tracer);
 
   Error ProfileExecuteData(EventTracer* event_tracer);
 
@@ -52,7 +53,7 @@ class QnnManager {
   }
 
   bool IsTensorDump() {
-    return options_->tensor_dump_output_path()->size() > 0;
+    return options_->dump_intermediate_outputs();
   }
 
   bool IsNodeSupportedByBackend(
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ target_link_libraries(`
`181`	`181`	`)`
`182`	`182`	`target_link_libraries(`
`183`	`183`	`qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager`
`184`		`- executorch_no_prim_ops qcir_utils`
	`184`	`+ executorch_no_prim_ops qcir_utils extension_tensor`
`185`	`185`	`)`
`186`	`186`	`set_target_properties(`
`187`	`187`	`qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"`
`@@ -246,6 +246,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")`
`246`	`246`	`qnn_executorch_header`
`247`	`247`	`executorch`
`248`	`248`	`qcir_utils`
	`249`	`+ extension_tensor`
`249`	`250`	`)`
`250`	`251`	`target_link_libraries(`
`251`	`252`	`PyQnnWrapperAdaptor PRIVATE pybind11::module pybind11::lto wrappers`
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,8 @@ class QnnManager {`
`37`	`37`
`38`	`38`	`Error Execute(`
`39`	`39`	`const std::vector<Qnn_Tensor_t>& input_tensor_structs,`
`40`		`- std::vector<Qnn_Tensor_t>& output_tensor_structs);`
	`40`	`+ std::vector<Qnn_Tensor_t>& output_tensor_structs,`
	`41`	`+ EventTracer* event_tracer);`
`41`	`42`
`42`	`43`	`Error ProfileExecuteData(EventTracer* event_tracer);`
`43`	`44`
`@@ -52,7 +53,7 @@ class QnnManager {`
`52`	`53`	`}`
`53`	`54`
`54`	`55`	`bool IsTensorDump() {`
`55`		`- return options_->tensor_dump_output_path()->size() > 0;`
	`56`	`+ return options_->dump_intermediate_outputs();`
`56`	`57`	`}`
`57`	`58`
`58`	`59`	`bool IsNodeSupportedByBackend(`