Skip to content

Commit 69879d5

Browse files
authored
Merge branch 'main' into ops
2 parents 3c7cbb8 + 043c7a0 commit 69879d5

File tree

177 files changed

+4342
-2583
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+4342
-2583
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
08434df1f2f88c9770e59246caa2ff9c6f613270
1+
295f2ed4d103017f7e19a7b8263ece606cd629db

.ci/docker/common/install_android.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ install_sdk() {
7070
# These are the tools needed to build Android apps
7171
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platforms;android-34"
7272
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;33.0.1"
73+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;35.0.0"
7374
# And some more tools for future emulator tests
7475
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platform-tools"
7576
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "tools"

.ci/docker/conda-env-ci.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake=3.22.1
1+
cmake=3.26.4
22
ninja=1.10.2
33
libuv
44
llvm-openmp

.ci/scripts/build_android_instrumentation.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ fi
1313
which "${PYTHON_EXECUTABLE}"
1414

1515
build_android_test() {
16-
pushd extension/android_test
17-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew testDebugUnitTest
18-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
16+
mkdir -p extension/android/executorch_android/src/androidTest/resources
17+
cp extension/module/test/resources/add.pte extension/android/executorch_android/src/androidTest/resources
18+
pushd extension/android
19+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
20+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
1921
popd
2022
}
2123

@@ -24,8 +26,7 @@ collect_artifacts_to_be_uploaded() {
2426
# Collect Java library test
2527
JAVA_LIBRARY_TEST_DIR="${ARTIFACTS_DIR_NAME}/library_test_dir"
2628
mkdir -p "${JAVA_LIBRARY_TEST_DIR}"
27-
cp extension/android_test/build/outputs/apk/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
28-
cp extension/android_test/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
29+
cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
2930
}
3031

3132
main() {
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
13+
14+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
15+
PYTHON_EXECUTABLE=python3
16+
fi
17+
18+
which "${PYTHON_EXECUTABLE}"
19+
20+
# Update tokenizers submodule
21+
pushd $EXECUTORCH_ROOT/extension/llm/tokenizers
22+
echo "Update tokenizers submodule"
23+
git submodule update --init
24+
popd
25+
26+
# Install ET with CMake
27+
cmake -DPYTHON_EXECUTABLE=python \
28+
-DCMAKE_INSTALL_PREFIX=cmake-out \
29+
-DEXECUTORCH_ENABLE_LOGGING=1 \
30+
-DCMAKE_BUILD_TYPE=Release \
31+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
33+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
34+
-DEXECUTORCH_BUILD_XNNPACK=OFF \
35+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
36+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
37+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
38+
-Bcmake-out .
39+
cmake --build cmake-out -j16 --target install --config Release
40+
41+
# Install llama runner with torchao
42+
cmake -DPYTHON_EXECUTABLE=python \
43+
-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
44+
-DCMAKE_BUILD_TYPE=Release \
45+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
46+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
47+
-DEXECUTORCH_BUILD_XNNPACK=OFF \
48+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
49+
-DEXECUTORCH_BUILD_TORCHAO=ON \
50+
-Bcmake-out/examples/models/llama \
51+
examples/models/llama
52+
cmake --build cmake-out/examples/models/llama -j16 --config Release
53+
54+
# Download stories llama110m artifacts
55+
download_stories_model_artifacts
56+
57+
echo "Creating tokenizer.bin"
58+
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
59+
60+
# Export model
61+
LLAMA_CHECKPOINT=stories110M.pt
62+
LLAMA_PARAMS=params.json
63+
MODEL_OUT=model.pte
64+
TOKENIZER=tokenizer.bin
65+
66+
# Set low-bit quantization parameters
67+
QLINEAR_BITWIDTH=3 # Can be 1-8
68+
QLINEAR_GROUP_SIZE=128 # Must be multiple of 16
69+
QEMBEDDING_BITWIDTH=4 # Can be 1-8
70+
QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16
71+
72+
${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
73+
--checkpoint "${LLAMA_CHECKPOINT:?}" \
74+
--params "${LLAMA_PARAMS:?}" \
75+
-kv \
76+
--use_sdpa_with_kv_cache \
77+
--output_name=${MODEL_OUT} \
78+
-qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
79+
--group_size ${QLINEAR_GROUP_SIZE} \
80+
-E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
81+
--disable_dynamic_shape \
82+
-d fp32
83+
84+
# Test run
85+
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path=$TOKENIZER --prompt="Once upon a time,"

.github/workflows/_android.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,16 @@ jobs:
2828
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
2929
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
3030
31+
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
32+
bash examples/models/llama/install_requirements.sh
33+
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
34+
3135
# Build LLM Demo for Android
3236
export BUILD_AAR_DIR=aar-out
37+
mkdir -p $BUILD_AAR_DIR
3338
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
3439
bash .ci/scripts/build_android_instrumentation.sh ${ARTIFACTS_DIR_NAME}
3540
36-
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
37-
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
38-
3941
mkdir -p examples/demo-apps/android/LlamaDemo/app/libs
4042
cp aar-out/executorch.aar examples/demo-apps/android/LlamaDemo/app/libs
4143
pushd examples/demo-apps/android/LlamaDemo
@@ -94,7 +96,6 @@ jobs:
9496
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
9597
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
9698
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
97-
curl -o android-test-debug.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug.apk
9899
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug-androidTest.apk
99100
unzip model.zip
100101
mv *.pte model.pte

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: android-perf
22

33
on:
44
schedule:
5-
- cron: 0 0 * * *
5+
- cron: 0 0,8,16 * * *
66
pull_request:
77
paths:
88
- .github/workflows/android-perf.yml

.github/workflows/doc-build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
with:
2727
job-name: Build doc
2828
runner: linux.2xlarge
29-
docker-image: executorch-ubuntu-22.04-clang12
29+
docker-image: executorch-ubuntu-22.04-clang12-android
3030
submodules: 'true'
3131
repository: pytorch/executorch
3232
upload-artifact: docs
@@ -70,8 +70,8 @@ jobs:
7070
7171
# Build javadoc:
7272
cd extension/android
73-
./gradlew javadoc
74-
cp -rf build/docs/javadoc "${RUNNER_DOCS_DIR}"
73+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:javaDocReleaseGeneration
74+
cp -rf executorch_android/build/intermediates/java_doc_dir/release/javaDocReleaseGeneration "${RUNNER_DOCS_DIR}/javadoc"
7575
cd ../..
7676
7777
# If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.

.github/workflows/lint.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ jobs:
7676
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
7777
timeout: 90
7878
script: |
79-
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
80-
examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \
79+
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
80+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
8181
examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
8282
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
8383
if [ -n "$FILES_NEEDS_FORMAT" ]; then

.github/workflows/pull.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
- runner: linux.arm64.2xlarge
6161
docker-image: executorch-ubuntu-22.04-clang12
6262
# TODO: Need to figure out why buck2 doesnt work on Graviton instances.
63-
- runner: linux.arm64.2xlarge
63+
- runner: linux.arm64.2xlarge
6464
build-tool: buck2
6565
fail-fast: false
6666
with:
@@ -420,7 +420,6 @@ jobs:
420420
permissions:
421421
id-token: write
422422
contents: read
423-
needs: test-llama-runner-linux
424423

425424
unittest:
426425
uses: ./.github/workflows/_unittest.yml

.github/workflows/trunk.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
2424
strategy:
2525
matrix:
26-
# Mac runners are expensive and limited, and non reliable.
27-
# Do some basic testing for macos jobs, and rely mostly on
26+
# Mac runners are expensive and limited, and non reliable.
27+
# Do some basic testing for macos jobs, and rely mostly on
2828
# test-models-linux-aarch64 job instead.
2929
model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
3030
backend: [xnnpack-quantization-delegation]
@@ -288,6 +288,26 @@ jobs:
288288
# Test ANE llama
289289
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
290290
291+
test-llama-torchao-lowbit:
292+
name: test-llama-torchao-lowbit
293+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
294+
with:
295+
runner: macos-m1-stable
296+
python-version: '3.11'
297+
submodules: 'true'
298+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
299+
script: |
300+
set -eux
301+
bash .ci/scripts/setup-conda.sh
302+
eval "$(conda shell.bash hook)"
303+
304+
# Install requirements
305+
${CONDA_RUN} python install_executorch.py
306+
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
307+
308+
# Run test
309+
${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
310+
291311
test-llama-runner-linux:
292312
# Test Both linux x86 and linux aarch64
293313
name: test-llama-runner-linux

backends/arm/_passes/insert_rescales_pass.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,17 @@ def rescale_fake(
3838
"""Casts the input tensor to dtype `dtype` to produce the correct tensor meta for a _rescale op.
3939
Additionally validates TOSA constraints of a RESCALE op.
4040
"""
41-
if not (dtype == torch.int32 or dtype == torch.int8):
41+
if dtype not in (torch.int32, torch.int8, torch.int16):
4242
raise NotImplementedError(
43-
"tosa::rescale currently only supports int32 and int8."
43+
f"tosa::rescale currently only supports int32, int16 and int8, not {dtype}"
4444
)
45-
if dtype == torch.int32 and out_zp != 0:
45+
if dtype in (torch.int32, torch.int16) and out_zp != 0:
4646
raise ValueError(
47-
"TOSA requires output_zp to be zero when the output dtype is int32."
47+
f"TOSA requires output_zp to be zero when the output dtype is {dtype}."
4848
)
49-
if x.dtype == torch.int32 and in_zp != 0:
49+
if x.dtype in (torch.int32, torch.int16) and in_zp != 0:
5050
raise ValueError(
51-
"TOSA requires input_zp to be zero when the input dtype is int32."
51+
f"TOSA requires input_zp to be zero when the input dtype is {dtype}"
5252
)
5353
if x.dtype == torch.int8 and not -128 <= in_zp <= 127:
5454
raise ValueError(f"{in_zp=} outside valid range (-128,127) for int8.")

0 commit comments

Comments
 (0)