Skip to content

Commit 055d8b9

Browse files
committed
Update on "[ExecuTorch] Reapply D62466496: Build optimized kernels with bf16 support and gate usage at runtime"
Now with fewer broken tests. Differential Revision: [D62680594](https://our.internmc.facebook.com/intern/diff/D62680594/) [ghstack-poisoned]
2 parents 6479031 + f91719b commit 055d8b9

31 files changed

+538
-261
lines changed

.github/workflows/android-perf.yml

Lines changed: 7 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ jobs:
140140
submodules: 'true'
141141
timeout: 60
142142
upload-artifact: android-models
143+
upload-artifact-to-s3: true
143144
script: |
144145
# The generic Linux job chooses to use base env, not the one setup by the image
145146
echo "::group::Setting up dev environment"
@@ -175,35 +176,6 @@ jobs:
175176
fi
176177
echo "::endgroup::"
177178
178-
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
179-
upload-models:
180-
needs: export-models
181-
runs-on: linux.2xlarge
182-
if: always() # Continue this job regardless of previous job outcome
183-
steps:
184-
- name: Download the models from GitHub
185-
uses: actions/download-artifact@v3
186-
with:
187-
# The name here needs to match the name of the upload-artifact parameter
188-
name: android-models
189-
path: ${{ runner.temp }}/artifacts/
190-
191-
- name: Verify the models
192-
shell: bash
193-
working-directory: ${{ runner.temp }}/artifacts/
194-
run: |
195-
ls -lah ./
196-
197-
- name: Upload the models to S3
198-
uses: seemethere/upload-artifact-s3@v5
199-
with:
200-
s3-bucket: gha-artifacts
201-
s3-prefix: |
202-
${{ github.repository }}/${{ github.run_id }}/artifact
203-
retention-days: 1
204-
if-no-files-found: ignore
205-
path: ${{ runner.temp }}/artifacts/
206-
207179
build-llm-demo:
208180
name: build-llm-demo
209181
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -215,6 +187,7 @@ jobs:
215187
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
216188
timeout: 90
217189
upload-artifact: android-apps
190+
upload-artifact-to-s3: true
218191
script: |
219192
set -eux
220193
@@ -230,34 +203,6 @@ jobs:
230203
export ANDROID_ABIS="arm64-v8a"
231204
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
232205
233-
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
234-
upload-android-apps:
235-
needs: build-llm-demo
236-
runs-on: linux.2xlarge
237-
steps:
238-
- name: Download the apps from GitHub
239-
uses: actions/download-artifact@v3
240-
with:
241-
# The name here needs to match the name of the upload-artifact parameter
242-
name: android-apps
243-
path: ${{ runner.temp }}/artifacts/
244-
245-
- name: Verify the apps
246-
shell: bash
247-
working-directory: ${{ runner.temp }}/artifacts/
248-
run: |
249-
ls -lah ./
250-
251-
- name: Upload the apps to S3
252-
uses: seemethere/upload-artifact-s3@v5
253-
with:
254-
s3-bucket: gha-artifacts
255-
s3-prefix: |
256-
${{ github.repository }}/${{ github.run_id }}/artifact
257-
retention-days: 14
258-
if-no-files-found: ignore
259-
path: ${{ runner.temp }}/artifacts/
260-
261206
# Let's see how expensive this job is, we might want to tone it down by running it periodically
262207
benchmark-on-device:
263208
permissions:
@@ -266,8 +211,8 @@ jobs:
266211
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
267212
needs:
268213
- set-parameters
269-
- upload-models
270-
- upload-android-apps
214+
- build-llm-demo
215+
- export-models
271216
strategy:
272217
matrix:
273218
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
@@ -285,9 +230,9 @@ jobs:
285230
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
286231
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
287232
# one app+flavor that could load and run the model.
288-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug.apk
289-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug-androidTest.apk
233+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
234+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
290235
# NB: Need to set the default spec here so that it works for periodic too
291236
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
292237
# Uploaded to S3 from the previous job
293-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
238+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

.github/workflows/android.yml

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ jobs:
3333
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
3434
timeout: 90
3535
upload-artifact: android-apps
36+
upload-artifact-to-s3: true
3637
script: |
3738
set -eux
3839
@@ -45,38 +46,6 @@ jobs:
4546
# Build LLM Demo for Android
4647
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
4748
48-
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
49-
upload-artifacts:
50-
needs: build-llm-demo
51-
runs-on: linux.2xlarge
52-
steps:
53-
- name: Download the artifacts from GitHub
54-
uses: actions/download-artifact@v3
55-
with:
56-
# The name here needs to match the name of the upload-artifact parameter
57-
name: android-apps
58-
path: ${{ runner.temp }}/artifacts/
59-
60-
- name: Verify the artifacts
61-
shell: bash
62-
working-directory: ${{ runner.temp }}/artifacts/
63-
run: |
64-
ls -lah ./
65-
66-
- name: Upload the artifacts to S3
67-
uses: seemethere/upload-artifact-s3@v5
68-
with:
69-
s3-bucket: gha-artifacts
70-
s3-prefix: |
71-
${{ github.repository }}/${{ github.run_id }}/artifact
72-
# NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to
73-
# benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now
74-
# for TorchChat until we have a periodic job can publish it more often. Ideally I want to
75-
# reduce it to <= 2 day, meaning the benchmark job will run daily.
76-
retention-days: 14
77-
if-no-files-found: ignore
78-
path: ${{ runner.temp }}/artifacts/
79-
8049
# Running Android emulator directly on the runner and not using Docker
8150
run-emulator:
8251
needs: build-llm-demo

.github/workflows/apple-perf.yml

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,13 @@ jobs:
124124
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
125125
fail-fast: false
126126
with:
127-
runner: macos-latest-xlarge
127+
# NB: Need to use our AWS MacOS runner to upload large models to S3
128+
runner: macos-m1-stable
128129
python-version: '3.11'
129130
submodules: 'true'
130131
timeout: 60
131132
upload-artifact: ios-models
133+
upload-artifact-to-s3: true
132134
script: |
133135
set -eux
134136
@@ -176,34 +178,6 @@ jobs:
176178
fi
177179
echo "::endgroup::"
178180
179-
upload-models:
180-
needs: export-models
181-
runs-on: linux.2xlarge
182-
if: always() # Continue this job regardless of previous job outcome
183-
steps:
184-
- name: Download the models from GitHub
185-
uses: actions/download-artifact@v3
186-
with:
187-
# The name here needs to match the name of the upload-artifact parameter
188-
name: ios-models
189-
path: ${{ runner.temp }}/artifacts/
190-
191-
- name: Verify the models
192-
shell: bash
193-
working-directory: ${{ runner.temp }}/artifacts/
194-
run: |
195-
ls -lah ./
196-
197-
- name: Upload the models to S3
198-
uses: seemethere/upload-artifact-s3@v5
199-
with:
200-
s3-bucket: gha-artifacts
201-
s3-prefix: |
202-
${{ github.repository }}/${{ github.run_id }}/artifact
203-
retention-days: 1
204-
if-no-files-found: ignore
205-
path: ${{ runner.temp }}/artifacts/
206-
207181
build-benchmark-app:
208182
name: build-benchmark-app
209183
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -297,7 +271,7 @@ jobs:
297271
with:
298272
s3-bucket: gha-artifacts
299273
s3-prefix: |
300-
${{ github.repository }}/${{ github.run_id }}/artifact
274+
${{ github.repository }}/${{ github.run_id }}/artifacts
301275
retention-days: 14
302276
if-no-files-found: ignore
303277
path: ${{ runner.temp }}/artifacts/
@@ -306,7 +280,7 @@ jobs:
306280
needs:
307281
- set-parameters
308282
- upload-benchmark-app
309-
- upload-models
283+
- export-models
310284
permissions:
311285
id-token: write
312286
contents: read
@@ -326,7 +300,7 @@ jobs:
326300
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
327301
device-pool-arn: ${{ matrix.device }}
328302
# Uploaded to S3 from the previous job
329-
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.ipa
330-
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.xctestrun.zip
303+
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
304+
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
331305
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
332-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
306+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

.github/workflows/upload-android-test-specs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
with:
3030
s3-bucket: gha-artifacts
3131
s3-prefix: |
32-
${{ github.repository }}/${{ github.run_id }}/artifact
32+
${{ github.repository }}/${{ github.run_id }}/artifacts
3333
retention-days: 1
3434
if-no-files-found: error
3535
path: extension/android/benchmark/android-llm-device-farm-test-spec.yml
@@ -45,7 +45,7 @@ jobs:
4545
models: stories110M
4646
devices: samsung_galaxy_s22
4747
delegates: xnnpack
48-
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/android-llm-device-farm-test-spec.yml
48+
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/android-llm-device-farm-test-spec.yml
4949

5050
upload-android-test-spec:
5151
needs: validate-android-test-spec

.github/workflows/upload-apple-test-specs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
with:
3030
s3-bucket: gha-artifacts
3131
s3-prefix: |
32-
${{ github.repository }}/${{ github.run_id }}/artifact
32+
${{ github.repository }}/${{ github.run_id }}/artifacts
3333
retention-days: 1
3434
if-no-files-found: error
3535
path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
@@ -46,7 +46,7 @@ jobs:
4646
models: stories110M
4747
devices: apple_iphone_15
4848
delegates: xnnpack
49-
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/default-ios-device-farm-appium-test-spec.yml
49+
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/default-ios-device-farm-appium-test-spec.yml
5050

5151
upload-apple-test-spec:
5252
needs: validate-apple-test-spec

backends/qualcomm/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ target_link_libraries(
181181
)
182182
target_link_libraries(
183183
qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager
184-
executorch_no_prim_ops qcir_utils
184+
executorch_no_prim_ops qcir_utils extension_tensor
185185
)
186186
set_target_properties(
187187
qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
@@ -246,6 +246,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
246246
qnn_executorch_header
247247
executorch
248248
qcir_utils
249+
extension_tensor
249250
)
250251
target_link_libraries(
251252
PyQnnWrapperAdaptor PRIVATE pybind11::module pybind11::lto wrappers

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,10 @@ Error QnnExecuTorchBackend::execute(
213213
}
214214

215215
ET_CHECK_OR_RETURN_ERROR(
216-
qnn_manager->Execute(input_tensor_structs, output_tensor_structs) ==
217-
Error::Ok,
216+
qnn_manager->Execute(
217+
input_tensor_structs,
218+
output_tensor_structs,
219+
context.event_tracer()) == Error::Ok,
218220
Internal,
219221
"Fail to execute graph");
220222
ET_CHECK_OR_RETURN_ERROR(

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <executorch/backends/qualcomm/runtime/Utils.h>
1111
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
1212
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
13+
#include <executorch/extension/tensor/tensor.h>
1314
#include <algorithm>
1415
#include <cstdlib>
1516
#include <cstring>
@@ -57,9 +58,7 @@ QnnManager::QnnManager(
5758
"backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
5859
QNN_EXECUTORCH_LOG_INFO("graph_name: %s", options_->graph_name()->c_str());
5960
QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
60-
QNN_EXECUTORCH_LOG_INFO(
61-
"tensor_dump_output_path: %s",
62-
options_->tensor_dump_output_path()->c_str());
61+
QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
6362
QNN_EXECUTORCH_LOG_INFO(
6463
"log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
6564
QNN_EXECUTORCH_LOG_INFO(
@@ -366,7 +365,8 @@ Error QnnManager::AllocateTensor(
366365

367366
Error QnnManager::Execute(
368367
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
369-
std::vector<Qnn_Tensor_t>& output_tensor_structs) {
368+
std::vector<Qnn_Tensor_t>& output_tensor_structs,
369+
EventTracer* event_tracer) {
370370
Qnn_ErrorHandle_t error = QNN_SUCCESS;
371371

372372
error = backend_params_ptr_->qnn_graph_ptr_->GraphExecute(
@@ -377,30 +377,27 @@ Error QnnManager::Execute(
377377
"qnn_graph_execute failed. Error %d", QNN_GET_ERROR_CODE(error));
378378
return Error::Internal;
379379
}
380-
381380
if (IsTensorDump()) {
382381
// TODO: Need to handle the graph which is partitioned.
383382
// Maybe we could use graph name.
384-
std::string dir = options_->tensor_dump_output_path()->str() + "/Result/";
385-
CreateDirectory(dir);
386-
QNN_EXECUTORCH_LOG_INFO("Dump tensor to the path: %s", dir.c_str());
387383
for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
388384
++out_idx) {
389385
const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
390-
391-
std::string output_path =
392-
dir + QNN_VER_PTR(output_tensor)->name + "_tensor.raw";
393-
394-
std::ofstream fout(output_path, std::ios::binary);
395-
if (fout.fail()) {
396-
QNN_EXECUTORCH_LOG_ERROR(
397-
"Dump tensor name: %s Failed.", QNN_VER_PTR(output_tensor)->name);
398-
return Error::Internal;
399-
}
400-
401-
fout.write(
402-
static_cast<const char*>(QNN_VER_PTR(output_tensor)->clientBuf.data),
403-
QNN_VER_PTR(output_tensor)->clientBuf.dataSize);
386+
std::vector<exec_aten::SizesType> sizes(
387+
QNN_VER_PTR(output_tensor)->dimensions,
388+
QNN_VER_PTR(output_tensor)->dimensions +
389+
QNN_VER_PTR(output_tensor)->rank);
390+
391+
auto dump_tensor = executorch::extension::from_blob(
392+
QNN_VER_PTR(output_tensor)->clientBuf.data,
393+
sizes,
394+
qnn_dtype_to_scalar_type_[QNN_VER_PTR(output_tensor)->dataType]);
395+
396+
torch::executor::event_tracer_log_output_delegate<exec_aten::Tensor>(
397+
event_tracer,
398+
QNN_VER_PTR(output_tensor)->name,
399+
/*delegate_debug_id=*/static_cast<torch::executor::DebugHandle>(-1),
400+
*dump_tensor);
404401
}
405402
}
406403

backends/qualcomm/runtime/QnnManager.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ class QnnManager {
3737

3838
Error Execute(
3939
const std::vector<Qnn_Tensor_t>& input_tensor_structs,
40-
std::vector<Qnn_Tensor_t>& output_tensor_structs);
40+
std::vector<Qnn_Tensor_t>& output_tensor_structs,
41+
EventTracer* event_tracer);
4142

4243
Error ProfileExecuteData(EventTracer* event_tracer);
4344

@@ -52,7 +53,7 @@ class QnnManager {
5253
}
5354

5455
bool IsTensorDump() {
55-
return options_->tensor_dump_output_path()->size() > 0;
56+
return options_->dump_intermediate_outputs();
5657
}
5758

5859
bool IsNodeSupportedByBackend(

0 commit comments

Comments
 (0)