Skip to content

Commit b803bb7

Browse files
committed
Update on "[ET-VK] Introduce virtual_transpose() to vTensor for no copy transposition"
## Context With `axis_map` integrated into matrix multiplication, we can now test no-copy transposes for texture backed tensors. Transposing a tensor can be done without modifying the storage by swapping elements in the tensor's `axis_map`, and also updating the layout of the tensor if the packed dimension was one of the dims that were transposed. Differential Revision: [D62652009](https://our.internmc.facebook.com/intern/diff/D62652009/) [ghstack-poisoned]
2 parents 5f65919 + 4127a5e commit b803bb7

File tree

104 files changed

+2722
-880
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+2722
-880
lines changed

.ci/scripts/test_model.sh

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,24 +50,21 @@ prepare_artifacts_upload() {
5050

5151
build_cmake_executor_runner() {
5252
echo "Building executor_runner"
53-
(rm -rf ${CMAKE_OUTPUT_DIR} \
54-
&& mkdir ${CMAKE_OUTPUT_DIR} \
55-
&& cd ${CMAKE_OUTPUT_DIR} \
56-
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
53+
rm -rf ${CMAKE_OUTPUT_DIR}
54+
cmake -DCMAKE_BUILD_TYPE=Debug \
5755
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
58-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
56+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
57+
-B${CMAKE_OUTPUT_DIR} .
5958

60-
cmake --build ${CMAKE_OUTPUT_DIR} -j4
59+
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
6160
}
6261

6362
run_portable_executor_runner() {
6463
# Run test model
6564
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
6665
buck2 run //examples/portable/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
6766
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
68-
if [[ ! -f ${CMAKE_OUTPUT_DIR}/executor_runner ]]; then
69-
build_cmake_executor_runner
70-
fi
67+
build_cmake_executor_runner
7168
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte"
7269
else
7370
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
@@ -177,6 +174,7 @@ test_model_with_qnn() {
177174
fi
178175

179176
# Use SM8450 for S22, SM8550 for S23, and SM8560 for S24
177+
# TODO(guangyang): Make QNN chipset matches the target device
180178
QNN_CHIPSET=SM8450
181179

182180
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only

.github/workflows/android-perf.yml

Lines changed: 8 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ jobs:
105105
# Mapping devices to their corresponding device-pool-arn
106106
declare -A DEVICE_POOL_ARNS
107107
DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
108+
DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
108109
109110
# Resolve device names with their corresponding ARNs
110111
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -139,6 +140,7 @@ jobs:
139140
submodules: 'true'
140141
timeout: 60
141142
upload-artifact: android-models
143+
upload-artifact-to-s3: true
142144
script: |
143145
# The generic Linux job chooses to use base env, not the one setup by the image
144146
echo "::group::Setting up dev environment"
@@ -174,50 +176,18 @@ jobs:
174176
fi
175177
echo "::endgroup::"
176178
177-
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
178-
upload-models:
179-
needs: export-models
180-
runs-on: linux.2xlarge
181-
if: always() # Continue this job regardless of previous job outcome
182-
steps:
183-
- name: Download the models from GitHub
184-
uses: actions/download-artifact@v3
185-
with:
186-
# The name here needs to match the name of the upload-artifact parameter
187-
name: android-models
188-
path: ${{ runner.temp }}/artifacts/
189-
190-
- name: Verify the models
191-
shell: bash
192-
working-directory: ${{ runner.temp }}/artifacts/
193-
run: |
194-
ls -lah ./
195-
196-
- name: Upload the models to S3
197-
uses: seemethere/upload-artifact-s3@v5
198-
with:
199-
s3-bucket: gha-artifacts
200-
s3-prefix: |
201-
${{ github.repository }}/${{ github.run_id }}/artifact
202-
retention-days: 1
203-
if-no-files-found: ignore
204-
path: ${{ runner.temp }}/artifacts/
205-
206179
build-llm-demo:
207180
name: build-llm-demo
208181
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
209182
needs: set-parameters
210-
strategy:
211-
matrix:
212-
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
213-
fail-fast: false
214183
with:
215184
runner: linux.2xlarge
216185
docker-image: executorch-ubuntu-22.04-clang12-android
217186
submodules: 'true'
218187
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
219188
timeout: 90
220189
upload-artifact: android-apps
190+
upload-artifact-to-s3: true
221191
script: |
222192
set -eux
223193
@@ -233,34 +203,6 @@ jobs:
233203
export ANDROID_ABIS="arm64-v8a"
234204
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
235205
236-
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
237-
upload-android-apps:
238-
needs: build-llm-demo
239-
runs-on: linux.2xlarge
240-
steps:
241-
- name: Download the apps from GitHub
242-
uses: actions/download-artifact@v3
243-
with:
244-
# The name here needs to match the name of the upload-artifact parameter
245-
name: android-apps
246-
path: ${{ runner.temp }}/artifacts/
247-
248-
- name: Verify the apps
249-
shell: bash
250-
working-directory: ${{ runner.temp }}/artifacts/
251-
run: |
252-
ls -lah ./
253-
254-
- name: Upload the apps to S3
255-
uses: seemethere/upload-artifact-s3@v5
256-
with:
257-
s3-bucket: gha-artifacts
258-
s3-prefix: |
259-
${{ github.repository }}/${{ github.run_id }}/artifact
260-
retention-days: 14
261-
if-no-files-found: ignore
262-
path: ${{ runner.temp }}/artifacts/
263-
264206
# Let's see how expensive this job is, we might want to tone it down by running it periodically
265207
benchmark-on-device:
266208
permissions:
@@ -269,8 +211,8 @@ jobs:
269211
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
270212
needs:
271213
- set-parameters
272-
- upload-models
273-
- upload-android-apps
214+
- build-llm-demo
215+
- export-models
274216
strategy:
275217
matrix:
276218
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
@@ -288,9 +230,9 @@ jobs:
288230
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
289231
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
290232
# one app+flavor that could load and run the model.
291-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug.apk
292-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug-androidTest.apk
233+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
234+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
293235
# NB: Need to set the default spec here so that it works for periodic too
294236
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
295237
# Uploaded to S3 from the previous job
296-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
238+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

.github/workflows/android.yml

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ jobs:
3333
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
3434
timeout: 90
3535
upload-artifact: android-apps
36+
upload-artifact-to-s3: true
3637
script: |
3738
set -eux
3839
@@ -45,38 +46,6 @@ jobs:
4546
# Build LLM Demo for Android
4647
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
4748
48-
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
49-
upload-artifacts:
50-
needs: build-llm-demo
51-
runs-on: linux.2xlarge
52-
steps:
53-
- name: Download the artifacts from GitHub
54-
uses: actions/download-artifact@v3
55-
with:
56-
# The name here needs to match the name of the upload-artifact parameter
57-
name: android-apps
58-
path: ${{ runner.temp }}/artifacts/
59-
60-
- name: Verify the artifacts
61-
shell: bash
62-
working-directory: ${{ runner.temp }}/artifacts/
63-
run: |
64-
ls -lah ./
65-
66-
- name: Upload the artifacts to S3
67-
uses: seemethere/upload-artifact-s3@v5
68-
with:
69-
s3-bucket: gha-artifacts
70-
s3-prefix: |
71-
${{ github.repository }}/${{ github.run_id }}/artifact
72-
# NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to
73-
# benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now
74-
# for TorchChat until we have a periodic job can publish it more often. Ideally I want to
75-
# reduce it to <= 2 day, meaning the benchmark job will run daily.
76-
retention-days: 14
77-
if-no-files-found: ignore
78-
path: ${{ runner.temp }}/artifacts/
79-
8049
# Running Android emulator directly on the runner and not using Docker
8150
run-emulator:
8251
needs: build-llm-demo
@@ -141,29 +110,3 @@ jobs:
141110
emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none
142111
# This is to make sure that the job doesn't fail flakily
143112
emulator-boot-timeout: 900
144-
145-
# Let's see how expensive this job is, we might want to tone it down by running it periodically
146-
test-llama-app:
147-
# Only PR from ExecuTorch itself has permission to access AWS, forked PRs will fail to
148-
# authenticate with the cloud service
149-
if: ${{ !github.event.pull_request.head.repo.fork }}
150-
needs: upload-artifacts
151-
permissions:
152-
id-token: write
153-
contents: read
154-
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
155-
with:
156-
device-type: android
157-
runner: linux.2xlarge
158-
test-infra-ref: ''
159-
# This is the ARN of ExecuTorch project on AWS
160-
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
161-
# This is the custom Android device pool that only includes Samsung Galaxy S2x
162-
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
163-
# Uploaded to S3 from the previous job, the name of the app comes from the project itself
164-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
165-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
166-
test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
167-
# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
168-
# days and the job will automatically re-upload the file when that happens.
169-
extra-data: https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip

.github/workflows/apple-perf.yml

Lines changed: 10 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ jobs:
7474
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7575
# during scheduled runs and to provide flexibility for different defaults between
7676
# on-demand and periodic benchmarking.
77-
CRON_DEFAULT_MODELS: "stories110M"
77+
CRON_DEFAULT_MODELS: "stories110M,mv3,ic4,resnet50,edsr,mobilebert,w2l"
7878
CRON_DEFAULT_DEVICES: "apple_iphone_15"
79-
CRON_DEFAULT_DELEGATES: "xnnpack"
79+
CRON_DEFAULT_DELEGATES: "xnnpack,coreml"
8080
run: |
8181
set -ex
8282
MODELS="${{ inputs.models }}"
@@ -124,11 +124,13 @@ jobs:
124124
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
125125
fail-fast: false
126126
with:
127-
runner: macos-latest-xlarge
127+
# NB: Need to use our AWS MacOS runner to upload large models to S3
128+
runner: macos-m1-stable
128129
python-version: '3.11'
129130
submodules: 'true'
130131
timeout: 60
131132
upload-artifact: ios-models
133+
upload-artifact-to-s3: true
132134
script: |
133135
set -eux
134136
@@ -176,34 +178,6 @@ jobs:
176178
fi
177179
echo "::endgroup::"
178180
179-
upload-models:
180-
needs: export-models
181-
runs-on: linux.2xlarge
182-
if: always() # Continue this job regardless of previous job outcome
183-
steps:
184-
- name: Download the models from GitHub
185-
uses: actions/download-artifact@v3
186-
with:
187-
# The name here needs to match the name of the upload-artifact parameter
188-
name: ios-models
189-
path: ${{ runner.temp }}/artifacts/
190-
191-
- name: Verify the models
192-
shell: bash
193-
working-directory: ${{ runner.temp }}/artifacts/
194-
run: |
195-
ls -lah ./
196-
197-
- name: Upload the models to S3
198-
uses: seemethere/upload-artifact-s3@v5
199-
with:
200-
s3-bucket: gha-artifacts
201-
s3-prefix: |
202-
${{ github.repository }}/${{ github.run_id }}/artifact
203-
retention-days: 1
204-
if-no-files-found: ignore
205-
path: ${{ runner.temp }}/artifacts/
206-
207181
build-benchmark-app:
208182
name: build-benchmark-app
209183
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -297,7 +271,7 @@ jobs:
297271
with:
298272
s3-bucket: gha-artifacts
299273
s3-prefix: |
300-
${{ github.repository }}/${{ github.run_id }}/artifact
274+
${{ github.repository }}/${{ github.run_id }}/artifacts
301275
retention-days: 14
302276
if-no-files-found: ignore
303277
path: ${{ runner.temp }}/artifacts/
@@ -306,7 +280,7 @@ jobs:
306280
needs:
307281
- set-parameters
308282
- upload-benchmark-app
309-
- upload-models
283+
- export-models
310284
permissions:
311285
id-token: write
312286
contents: read
@@ -326,7 +300,7 @@ jobs:
326300
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
327301
device-pool-arn: ${{ matrix.device }}
328302
# Uploaded to S3 from the previous job
329-
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.ipa
330-
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.xctestrun.zip
303+
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
304+
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
331305
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
332-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
306+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

0 commit comments

Comments
 (0)