Skip to content

Commit dc09af3

Browse files
committed
Update on "[ET-VK] Store unique ptr to Tensor in Value instead of inlined tensor object, to reduce Value struct size from 448 to 80 bytes."
This diff aims to reduce the size of the Value struct in the Executorch Vulkan runtime by storing a unique pointer to the Tensor object instead of an inlined tensor object. This change reduces the size of the Value struct from 448 bytes to 80 bytes, which can improve performance and reduce memory usage. Differential Revision: [D66655991](https://our.internmc.facebook.com/intern/diff/D66655991/) [ghstack-poisoned]
2 parents 5f370c5 + af7bbea commit dc09af3

File tree

129 files changed

+22149
-915
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+22149
-915
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ def main() -> None:
451451
continue
452452

453453
output_dir = os.path.join(args.output_dir, schema)
454-
os.mkdir(output_dir)
454+
os.makedirs(output_dir, exist_ok=True)
455455

456456
output_file = os.path.basename(args.artifacts)
457457
with open(f"{output_dir}/{output_file}", "w") as f:

.github/workflows/android-perf.yml

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@ name: android-perf
33
on:
44
schedule:
55
- cron: 0 0 * * *
6+
pull_request:
7+
paths:
8+
- .github/workflows/android-perf.yml
9+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
10+
push:
11+
branches:
12+
- main
13+
paths:
14+
- .github/workflows/android-perf.yml
15+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
616
# Note: GitHub has an upper limit of 10 inputs
717
workflow_dispatch:
818
inputs:
@@ -30,10 +40,6 @@ on:
3040
description: The list of configs used the benchmark
3141
required: false
3242
type: string
33-
test_spec:
34-
description: The test spec to drive the test on AWS devices
35-
required: false
36-
type: string
3743
workflow_call:
3844
inputs:
3945
models:
@@ -60,10 +66,6 @@ on:
6066
description: The list of configs used the benchmark
6167
required: false
6268
type: string
63-
test_spec:
64-
description: The test spec to drive the test on AWS devices
65-
required: false
66-
type: string
6769

6870
concurrency:
6971
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -84,9 +86,9 @@ jobs:
8486
# Separate default values from the workflow dispatch. To ensure defaults are accessible
8587
# during scheduled runs and to provide flexibility for different defaults between
8688
# on-demand and periodic benchmarking.
87-
CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit"
88-
CRON_DEFAULT_DEVICES: "samsung_galaxy_s22"
89-
CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
89+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,dl3,mv3,mv2,ic4,ic3,vit' || 'stories110M' }}
90+
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
91+
CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,qnn' || 'xnnpack' }}
9092
run: |
9193
set -ex
9294
MODELS="${{ inputs.models }}"
@@ -125,6 +127,43 @@ jobs:
125127
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
126128
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
127129
130+
prepare-test-specs:
131+
runs-on: linux.2xlarge
132+
needs: set-parameters
133+
strategy:
134+
matrix:
135+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
136+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
137+
fail-fast: false
138+
steps:
139+
- uses: actions/checkout@v3
140+
141+
- name: Prepare the spec
142+
shell: bash
143+
working-directory: extension/benchmark/android/benchmark
144+
run: |
145+
set -eux
146+
147+
# The model will be exported in the next step to this S3 path
148+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
149+
# We could write a script to properly use jinja here, but there is only one variable,
150+
# so let's just sed it
151+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
152+
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
153+
154+
# Just print the test spec for debugging
155+
cat android-llm-device-farm-test-spec.yml
156+
157+
- name: Upload the spec
158+
uses: seemethere/upload-artifact-s3@v5
159+
with:
160+
s3-bucket: gha-artifacts
161+
s3-prefix: |
162+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
163+
retention-days: 1
164+
if-no-files-found: error
165+
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
166+
128167
export-models:
129168
name: export-models
130169
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -170,9 +209,18 @@ jobs:
170209
echo "Unsupported delegate ${{ matrix.delegate }}"
171210
exit 1
172211
fi
173-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
212+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
213+
-model "${{ matrix.model }}" \
214+
-build_tool "${BUILD_MODE}" \
215+
-dtype "${DTYPE}" \
216+
-mode "${DELEGATE_CONFIG}" \
217+
-upload "${ARTIFACTS_DIR_NAME}"
174218
else
175-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
219+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
220+
"${{ matrix.model }}" \
221+
"${BUILD_MODE}" \
222+
"${{ matrix.delegate }}" \
223+
"${ARTIFACTS_DIR_NAME}"
176224
fi
177225
echo "::endgroup::"
178226
@@ -212,6 +260,7 @@ jobs:
212260
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
213261
needs:
214262
- set-parameters
263+
- prepare-test-specs
215264
- build-benchmark-app
216265
- export-models
217266
strategy:
@@ -231,10 +280,7 @@ jobs:
231280
device-pool-arn: ${{ matrix.device }}
232281
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
233282
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
234-
# NB: Need to set the default spec here so that it works for periodic too
235-
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
236-
# Uploaded to S3 from the previous job
237-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
283+
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml
238284

239285
upload-benchmark-results:
240286
needs:

.github/workflows/apple-perf.yml

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@ name: apple-perf
33
on:
44
schedule:
55
- cron: 0 1 * * *
6+
pull_request:
7+
paths:
8+
- .github/workflows/apple-perf.yml
9+
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
10+
push:
11+
branches:
12+
- main
13+
paths:
14+
- .github/workflows/apple-perf.yml
15+
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
616
# Note: GitHub has an upper limit of 10 inputs
717
workflow_dispatch:
818
inputs:
@@ -25,10 +35,6 @@ on:
2535
description: The list of configs used the benchmark
2636
required: false
2737
type: string
28-
test_spec:
29-
description: The test spec to drive the test on AWS devices
30-
required: false
31-
type: string
3238
workflow_call:
3339
inputs:
3440
models:
@@ -50,10 +56,6 @@ on:
5056
description: The list of configs used the benchmark
5157
required: false
5258
type: string
53-
test_spec:
54-
description: The test spec to drive the test on AWS devices
55-
required: false
56-
type: string
5759

5860
concurrency:
5961
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -74,9 +76,9 @@ jobs:
7476
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7577
# during scheduled runs and to provide flexibility for different defaults between
7678
# on-demand and periodic benchmarking.
77-
CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l"
78-
CRON_DEFAULT_DEVICES: "apple_iphone_15"
79-
CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps"
79+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l' || 'stories110M' }}
80+
CRON_DEFAULT_DEVICES: apple_iphone_15
81+
CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,coreml,mps' || 'xnnpack' }}
8082
run: |
8183
set -ex
8284
MODELS="${{ inputs.models }}"
@@ -114,6 +116,41 @@ jobs:
114116
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
115117
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
116118
119+
prepare-test-specs:
120+
runs-on: linux.2xlarge
121+
needs: set-parameters
122+
strategy:
123+
matrix:
124+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
125+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
126+
fail-fast: false
127+
steps:
128+
- uses: actions/checkout@v3
129+
130+
- name: Prepare the spec
131+
shell: bash
132+
working-directory: extension/benchmark/apple/Benchmark
133+
run: |
134+
set -eux
135+
# The model will be exported in the next step to this S3 path
136+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
137+
# We could write a script to properly use jinja here, but there is only one variable,
138+
# so let's just sed it
139+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
140+
cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
141+
# Just print the test spec for debugging
142+
cat default-ios-device-farm-appium-test-spec.yml
143+
144+
- name: Upload the spec
145+
uses: seemethere/upload-artifact-s3@v5
146+
with:
147+
s3-bucket: gha-artifacts
148+
s3-prefix: |
149+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
150+
retention-days: 1
151+
if-no-files-found: error
152+
path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
153+
117154
export-models:
118155
name: export-models
119156
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -173,10 +210,19 @@ jobs:
173210
DELEGATE_CONFIG="mps"
174211
fi
175212
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
176-
bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
213+
bash .ci/scripts/test_llama.sh \
214+
-model "${{ matrix.model }}" \
215+
-build_tool "${BUILD_MODE}" \
216+
-dtype "${DTYPE}" \
217+
-mode "${DELEGATE_CONFIG}" \
218+
-upload "${ARTIFACTS_DIR_NAME}"
177219
else
178220
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
179-
bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
221+
bash .ci/scripts/test_model.sh \
222+
"${{ matrix.model }}" \
223+
"${BUILD_MODE}" \
224+
"${{ matrix.delegate }}" \
225+
"${ARTIFACTS_DIR_NAME}"
180226
fi
181227
echo "::endgroup::"
182228
@@ -282,6 +328,7 @@ jobs:
282328
if: always()
283329
needs:
284330
- set-parameters
331+
- prepare-test-specs
285332
- upload-benchmark-app
286333
- export-models
287334
permissions:
@@ -307,8 +354,7 @@ jobs:
307354
# Uploaded to S3 from the previous job
308355
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
309356
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
310-
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
311-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
357+
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/default-ios-device-farm-appium-test-spec.yml
312358

313359
upload-benchmark-results:
314360
needs:

.github/workflows/upload-android-test-specs.yml

Lines changed: 0 additions & 94 deletions
This file was deleted.

0 commit comments

Comments
 (0)