Skip to content

Commit a3ffbb9

Browse files
committed
Update on "[4/N] Add backend options map"
This is to manage the backend <-> BackendOptions map. Users will create the bakcend options map, and ET runtime will read the backend name, and dispatch the list of backend options to each backend. exported-using-ghexport Differential Revision: [D76149466](https://our.internmc.facebook.com/intern/diff/D76149466/) Differential Revision: [D76149466](https://our.internmc.facebook.com/intern/diff/D76149466) [ghstack-poisoned]
2 parents 875fdc4 + af165df commit a3ffbb9

File tree

480 files changed

+19775
-9593
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

480 files changed

+19775
-9593
lines changed

.ci/scripts/build-mediatek-sdk.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -eux
9+
10+
build_neuron_backend() {
11+
echo "Start building neuron backend."
12+
export ANDROID_NDK=/opt/ndk
13+
export MEDIATEK_SDK_ROOT=/tmp/neuropilot
14+
export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so
15+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
16+
17+
18+
cd ${EXECUTORCH_ROOT}
19+
./backends/mediatek/scripts/mtk_build.sh
20+
}
21+
22+
build_neuron_backend

.ci/scripts/gather_benchmark_configs.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,11 @@ def generate_compatible_configs(model_name: str, target_os=None) -> List[str]:
135135
# etLLM recipes for Llama
136136
repo_name = model_name.split("meta-llama/")[1]
137137
if "qlora" in repo_name.lower():
138-
configs.append("llama3_qlora")
138+
configs = ["llama3_qlora"]
139139
elif "spinquant" in repo_name.lower():
140-
configs.append("llama3_spinquant")
140+
configs = ["llama3_spinquant"]
141141
else:
142-
configs.append("llama3_fb16")
143-
configs.append("et_xnnpack_custom_spda_kv_cache_8da4w")
142+
configs.extend(["llama3_fb16", "et_xnnpack_custom_spda_kv_cache_8da4w"])
144143
configs.extend(
145144
[
146145
config

.ci/scripts/setup-mediatek-deps.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -eux
9+
10+
MEDIATEK_INSTALLATION_DIR=/tmp/neuropilot
11+
EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
12+
13+
install_neuropilot() {
14+
echo "Start installing neuropilot."
15+
mkdir -p "${MEDIATEK_INSTALLATION_DIR}"
16+
17+
curl -Lo /tmp/neuropilot-express.tar.gz "https://s3.ap-southeast-1.amazonaws.com/mediatek.neuropilot.com/06302508-4c94-4bf2-9789-b0ee44e83e27.gz"
18+
echo "Finishing downloading neuropilot sdk."
19+
tar zxvf /tmp/neuropilot-express.tar.gz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
20+
echo "Finishing unzip neuropilot sdk."
21+
22+
# Copy NP header
23+
cp ${MEDIATEK_INSTALLATION_DIR}/api/NeuronAdapter.h ${EXECUTORCH_ROOT}/backends/mediatek/runtime/include/api/
24+
25+
# Print the content for manual verification
26+
ls -lah "${MEDIATEK_INSTALLATION_DIR}"
27+
}
28+
29+
setup_neuropilot() {
30+
pip3 install -r ${EXECUTORCH_ROOT}/backends/mediatek/requirements.txt
31+
pip3 install ${MEDIATEK_INSTALLATION_DIR}/mtk_neuron-8.2.19-py3-none-linux_x86_64.whl
32+
pip3 install ${MEDIATEK_INSTALLATION_DIR}/mtk_converter-8.13.0_public_packages/mtk_converter-8.13.0+public-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
33+
}
34+
35+
setup_calibration_data() {
36+
curl -Lo /tmp/imagenette2-160.tgz https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
37+
tar zxvf /tmp/imagenette2-160.tgz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
38+
}
39+
40+
install_neuropilot
41+
setup_neuropilot
42+
setup_calibration_data

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ run_and_verify() {
147147

148148
# verify result.txt
149149
RESULT=$(cat result.txt)
150-
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
150+
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with"
151151

152152
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
153153
echo "Expected result prefix: ${EXPECTED_PREFIX}"

.ci/scripts/test_model.sh

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@ test_model_with_qnn() {
188188
EXPORT_SCRIPT=edsr
189189
# Additional deps for edsr
190190
pip install piq
191+
elif [[ "${MODEL_NAME}" == "albert" ]]; then
192+
EXPORT_SCRIPT=albert
193+
elif [[ "${MODEL_NAME}" == "bert" ]]; then
194+
EXPORT_SCRIPT=bert
195+
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
196+
EXPORT_SCRIPT=distilbert
197+
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
198+
EXPORT_SCRIPT=eurobert
191199
else
192200
echo "Unsupported model $MODEL_NAME"
193201
exit 1
@@ -197,7 +205,25 @@ test_model_with_qnn() {
197205
# TODO(guangyang): Make QNN chipset matches the target device
198206
QNN_CHIPSET=SM8450
199207

200-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
208+
SCRIPT_FOLDER=""
209+
case "${MODEL_NAME}" in
210+
"dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l")
211+
SCRIPT_FOLDER=scripts
212+
;;
213+
"albert"|"bert"|"distilbert")
214+
pip install evaluate
215+
SCRIPT_FOLDER=oss_scripts
216+
# Bert models running in 16bit will encounter op validation fail on some operations,
217+
# which requires CHIPSET >= SM8550.
218+
QNN_CHIPSET=SM8550
219+
;;
220+
*)
221+
echo "Unsupported model $MODEL_NAME"
222+
exit 1
223+
;;
224+
esac
225+
226+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.${SCRIPT_FOLDER}.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
201227
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
202228
}
203229

@@ -244,6 +270,24 @@ test_model_with_mps() {
244270
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
245271
}
246272

273+
test_model_with_mediatek() {
274+
if [[ "${MODEL_NAME}" == "dl3" ]]; then
275+
EXPORT_SCRIPT=deeplab_v3
276+
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
277+
EXPORT_SCRIPT=mobilenet_v3
278+
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
279+
EXPORT_SCRIPT=mobilenet_v2
280+
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
281+
EXPORT_SCRIPT=inception_v4
282+
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
283+
EXPORT_SCRIPT=inception_v3
284+
fi
285+
286+
PYTHONPATH=examples/mediatek/ "${PYTHON_EXECUTABLE}" -m examples.mediatek.model_export_scripts.${EXPORT_SCRIPT} -d /tmp/neuropilot/train -a ${EXPORT_SCRIPT}
287+
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
288+
}
289+
290+
247291
if [[ "${BACKEND}" == "portable" ]]; then
248292
echo "Testing ${MODEL_NAME} with portable kernels..."
249293
test_model
@@ -281,6 +325,12 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
281325
if [[ $? -eq 0 ]]; then
282326
prepare_artifacts_upload
283327
fi
328+
elif [[ "${BACKEND}" == "mediatek" ]]; then
329+
echo "Testing ${MODEL_NAME} with mediatek..."
330+
test_model_with_mediatek
331+
if [[ $? -eq 0 ]]; then
332+
prepare_artifacts_upload
333+
fi
284334
else
285335
set +e
286336
if [[ "${BACKEND}" == *"quantization"* ]]; then

.ci/scripts/tests/test_gather_benchmark_configs.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,24 @@ def test_generate_compatible_configs_llama_model(self):
112112
result = self.gather_benchmark_configs.generate_compatible_configs(
113113
model_name, target_os
114114
)
115-
expected = ["llama3_fb16", "llama3_coreml_ane"]
116-
self.assertEqual(result, expected)
115+
expected = [
116+
"llama3_fb16",
117+
"llama3_coreml_ane",
118+
"et_xnnpack_custom_spda_kv_cache_8da4w",
119+
"hf_xnnpack_custom_spda_kv_cache_8da4w",
120+
]
121+
self.assertCountEqual(result, expected)
117122

118123
target_os = "android"
119124
result = self.gather_benchmark_configs.generate_compatible_configs(
120125
model_name, target_os
121126
)
122-
expected = ["llama3_fb16"]
123-
self.assertEqual(result, expected)
127+
expected = [
128+
"llama3_fb16",
129+
"et_xnnpack_custom_spda_kv_cache_8da4w",
130+
"hf_xnnpack_custom_spda_kv_cache_8da4w",
131+
]
132+
self.assertCountEqual(result, expected)
124133

125134
def test_generate_compatible_configs_quantized_llama_model(self):
126135
model_name = "meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8"

.ci/scripts/unittest-buck2.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1515
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1616
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1717

18+
# TODO: optimized ops are unbuildable because they now use ATen; put
19+
# them back after we can use PyTorch in OSS buck.
1820
UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
19-
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
21+
BUILDABLE_OPTIMIZED_OPS= #$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2022

2123
# TODO: build prim_ops_test_cpp again once supported_features works in
2224
# OSS buck.

.ci/scripts/utils.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,14 @@ build_executorch_runner() {
156156
}
157157

158158
cmake_install_executorch_lib() {
159+
build_type="${1:-Release}"
159160
echo "Installing libexecutorch.a and libportable_kernels.a"
160161
clean_executorch_install_folders
161162
retry cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
162-
-DCMAKE_BUILD_TYPE=Release \
163+
-DCMAKE_BUILD_TYPE=${build_type} \
163164
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
164165
-Bcmake-out .
165-
cmake --build cmake-out -j9 --target install --config Release
166+
cmake --build cmake-out -j9 --target install --config ${build_type}
166167
}
167168

168169
download_stories_model_artifacts() {

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
description: Models to be benchmarked
1919
required: false
2020
type: string
21-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
21+
default: Qwen/Qwen3-0.6B
2222
devices:
2323
description: Target devices to run benchmark
2424
required: false
@@ -34,7 +34,7 @@ on:
3434
description: Models to be benchmarked
3535
required: false
3636
type: string
37-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
37+
default: Qwen/Qwen3-0.6B
3838
devices:
3939
description: Target devices to run benchmark
4040
required: false
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
6161
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ on:
66
pull_request:
77
paths:
88
- .github/workflows/android-perf.yml
9+
- .ci/scripts/gather_benchmark_configs.py
910
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
1011
push:
1112
branches:
1213
- main
1314
paths:
1415
- .github/workflows/android-perf.yml
16+
- .ci/scripts/gather_benchmark_configs.py
1517
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
1618
# Note: GitHub has an upper limit of 10 inputs
1719
workflow_dispatch:
@@ -20,7 +22,7 @@ on:
2022
description: Models to be benchmarked
2123
required: false
2224
type: string
23-
default: llama
25+
default: Qwen/Qwen3-0.6B
2426
devices:
2527
description: Target devices to run benchmark
2628
required: false
@@ -36,7 +38,7 @@ on:
3638
description: Models to be benchmarked
3739
required: false
3840
type: string
39-
default: llama
41+
default: Qwen/Qwen3-0.6B
4042
devices:
4143
description: Target devices to run benchmark
4244
required: false
@@ -70,7 +72,7 @@ jobs:
7072
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7173
# during scheduled runs and to provide flexibility for different defaults between
7274
# on-demand and periodic benchmarking.
73-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'llama' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
7476
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
7577
run: |
7678
set -eux
@@ -340,8 +342,8 @@ jobs:
340342
git clone https://github.com/huggingface/optimum-executorch
341343
pushd optimum-executorch
342344
# There is no release yet, for CI stability, always test from the same commit on main
343-
git checkout 1c653dc49812fc431a22312c7295d97005d22e12
344-
python install_dev.py
345+
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
346+
python install_dev.py --skip_override_torch
345347
pip list
346348
347349
ARGS=(

.github/workflows/apple-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
description: Models to be benchmarked
1919
required: false
2020
type: string
21-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
21+
default: Qwen/Qwen3-0.6B
2222
devices:
2323
description: Target devices to run benchmark
2424
required: false
@@ -34,7 +34,7 @@ on:
3434
description: Models to be benchmarked
3535
required: false
3636
type: string
37-
default: Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
37+
default: Qwen/Qwen3-0.6B
3838
devices:
3939
description: Target devices to run benchmark
4040
required: false
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
6161
devices: apple_iphone_15_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ on:
66
pull_request:
77
paths:
88
- .github/workflows/apple-perf.yml
9+
- .ci/scripts/gather_benchmark_configs.py
910
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
1011
push:
1112
branches:
1213
- main
1314
paths:
1415
- .github/workflows/apple-perf.yml
16+
- .ci/scripts/gather_benchmark_configs.py
1517
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
1618
# Note: GitHub has an upper limit of 10 inputs
1719
workflow_dispatch:
@@ -20,7 +22,7 @@ on:
2022
description: Models to be benchmarked
2123
required: false
2224
type: string
23-
default: llama
25+
default: Qwen/Qwen3-0.6B
2426
devices:
2527
description: Target devices to run benchmark
2628
required: false
@@ -36,7 +38,7 @@ on:
3638
description: Models to be benchmarked
3739
required: false
3840
type: string
39-
default: llama
41+
default: Qwen/Qwen3-0.6B
4042
devices:
4143
description: Target devices to run benchmark
4244
required: false
@@ -70,7 +72,7 @@ jobs:
7072
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7173
# during scheduled runs and to provide flexibility for different defaults between
7274
# on-demand and periodic benchmarking.
73-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'llama' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
7476
CRON_DEFAULT_DEVICES: apple_iphone_15
7577
run: |
7678
set -eux
@@ -345,8 +347,8 @@ jobs:
345347
git clone https://github.com/huggingface/optimum-executorch
346348
pushd optimum-executorch
347349
# There is no release yet, for CI stability, always test from the same commit on main
348-
git checkout 1c653dc49812fc431a22312c7295d97005d22e12
349-
${CONDA_RUN} python install_dev.py
350+
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
351+
${CONDA_RUN} python install_dev.py --skip_override_torch
350352
pip list
351353
352354
ARGS=(

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
fi
4747
4848
# This has already been cached in the docker image
49-
lintrunner init 2> /dev/null
49+
lintrunner init
5050
5151
RC=0
5252
# Run lintrunner on all files

0 commit comments

Comments
 (0)