Skip to content

Commit f2456fb

Browse files
committed
Update on "[executorch] Add vectorized scalar path for single-element Tensor passed to optimized mul"
We are currently doing slow broadcasting for this case. After this diff, we should get nice vectorization. Differential Revision: [D61560825](https://our.internmc.facebook.com/intern/diff/D61560825/) [ghstack-poisoned]
2 parents 9c11424 + 19365b2 commit f2456fb

File tree

447 files changed

+9178
-4681
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

447 files changed

+9178
-4681
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b556d31586845fb1e296a975d2b85d9d325205c9
1+
c42ac54d9e817bf0a0366eb78e6c8beba4d5eff5

.ci/docker/common/install_linter.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1313
# NB: Install all linter dependencies, the caching of lintrunner init could be
1414
# done after Executorch becomes public
1515
pip_install -r requirements-lintrunner.txt
16+
17+
# Install google-java-format
18+
curl -L --retry 3 https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format
19+
chmod +x /opt/google-java-format

.ci/scripts/build-qnn-sdk.sh

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,43 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
set -ex
8+
set -eux
99

1010
build_qnn_backend() {
1111
echo "Start building qnn backend."
1212
export ANDROID_NDK_ROOT=/opt/ndk
1313
export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
14-
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
14+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1515

1616
bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
1717
}
1818

19+
set_up_aot() {
20+
cd $EXECUTORCH_ROOT
21+
if [ ! -d "cmake-out" ]; then
22+
mkdir cmake-out
23+
fi
24+
pushd cmake-out
25+
cmake .. \
26+
-DCMAKE_INSTALL_PREFIX=$PWD \
27+
-DEXECUTORCH_BUILD_QNN=ON \
28+
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
29+
-DEXECUTORCH_BUILD_SDK=ON \
30+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
31+
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
32+
-DPYTHON_EXECUTABLE=python3 \
33+
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
34+
cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
35+
# install Python APIs to correct import path
36+
# The filename might vary depending on your Python and host version.
37+
cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
38+
cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
39+
popd
40+
41+
# Workaround for fbs files in exir/_serialize
42+
cp schema/program.fbs exir/_serialize/program.fbs
43+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
44+
}
45+
1946
build_qnn_backend
47+
set_up_aot

.ci/scripts/setup-linux.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,5 @@ fi
2020

2121
# As Linux job is running inside a Docker container, all of its dependencies
2222
# have already been installed
23-
install_flatc_from_source
2423
install_executorch
2524
build_executorch_runner "${BUILD_TOOL}"

.ci/scripts/setup-macos.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,5 @@ if [[ -z "${GITHUB_RUNNER:-}" ]]; then
128128
fi
129129

130130
print_cmake_info
131-
install_pytorch_and_domains
132-
install_flatc_from_source
133131
install_executorch
134132
build_executorch_runner "${BUILD_TOOL}"

.ci/scripts/test.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,25 @@ if [[ -z "${BACKEND:-}" ]]; then
2828
exit 1
2929
fi
3030

31+
UPLOAD_DIR=${4:-}
32+
33+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
34+
PYTHON_EXECUTABLE=python3
35+
fi
3136
which "${PYTHON_EXECUTABLE}"
37+
3238
# Just set this variable here, it's cheap even if we use buck2
3339
CMAKE_OUTPUT_DIR=cmake-out
40+
EXPORTED_MODEL=${MODEL_NAME}
41+
42+
prepare_artifacts_upload() {
43+
if [ -n "$UPLOAD_DIR" ]; then
44+
echo "Preparing for uploading generated artifacs"
45+
zip -j model.zip "${EXPORTED_MODEL}"
46+
mkdir -p "${UPLOAD_DIR}"
47+
mv model.zip "${UPLOAD_DIR}"
48+
fi
49+
}
3450

3551
build_cmake_executor_runner() {
3652
echo "Building executor_runner"
@@ -114,6 +130,7 @@ test_model_with_xnnpack() {
114130
fi
115131

116132
OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"
133+
EXPORTED_MODEL=${OUTPUT_MODEL_PATH}
117134

118135
# Run test model
119136
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
@@ -129,9 +146,36 @@ test_model_with_xnnpack() {
129146
fi
130147
}
131148

149+
test_model_with_qnn() {
150+
source "$(dirname "${BASH_SOURCE[0]}")/build-qnn-sdk.sh"
151+
echo "ANDROID_NDK_ROOT: $ANDROID_NDK_ROOT"
152+
echo "QNN_SDK_ROOT: $QNN_SDK_ROOT"
153+
echo "EXECUTORCH_ROOT: $EXECUTORCH_ROOT"
154+
155+
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
156+
export PYTHONPATH=$EXECUTORCH_ROOT/..
157+
158+
if [[ "${MODEL_NAME}" == "dl3" ]]; then
159+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.deeplab_v3 -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only --download
160+
EXPORTED_MODEL=./deeplab_v3/dlv3_qnn.pte
161+
fi
162+
}
163+
132164
if [[ "${BACKEND}" == "portable" ]]; then
133165
echo "Testing ${MODEL_NAME} with portable kernels..."
134166
test_model
167+
elif [[ "${BACKEND}" == "qnn" ]]; then
168+
echo "Testing ${MODEL_NAME} with qnn..."
169+
test_model_with_qnn
170+
if [[ $? -eq 0 ]]; then
171+
prepare_artifacts_upload
172+
fi
173+
elif [[ "${BACKEND}" == "xnnpack" ]]; then
174+
echo "Testing ${MODEL_NAME} with xnnpack..."
175+
test_model_with_xnnpack true true
176+
if [[ $? -eq 0 ]]; then
177+
prepare_artifacts_upload
178+
fi
135179
else
136180
set +e
137181
if [[ "${BACKEND}" == *"quantization"* ]]; then
@@ -153,5 +197,7 @@ else
153197
if [[ -n "${Q_ERROR:-}" ]] || [[ -n "${D_ERROR:-}" ]] || [[ -n "${Q_D_ERROR:-}" ]]; then
154198
echo "Portable q8 ${Q_ERROR:-ok}," "Delegation fp32 ${D_ERROR:-ok}," "Delegation q8 ${Q_D_ERROR:-ok}"
155199
exit 1
200+
else
201+
prepare_artifacts_upload
156202
fi
157203
fi

.ci/scripts/test_llama.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ set -exu
99
# shellcheck source=/dev/null
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
MODEL_NAME=$1 # stories110M.pt
12+
MODEL_NAME=$1 # stories110M
1313
BUILD_TOOL=$2 # buck2 or cmake
1414
DTYPE=$3 # fp16 or fp32
1515
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
@@ -140,7 +140,7 @@ cmake_build_llama_runner() {
140140

141141
cleanup_files() {
142142
echo "Deleting downloaded and generated files"
143-
rm "${MODEL_NAME}"
143+
rm "${CHECKPOINT_FILE_NAME}"
144144
rm tokenizer.model
145145
rm tokenizer.bin
146146
rm "${EXPORTED_MODEL_NAME}"
@@ -159,8 +159,10 @@ prepare_artifacts_upload() {
159159

160160
# Download and create artifacts.
161161
PARAMS="params.json"
162+
CHECKPOINT_FILE_NAME=""
162163
touch "${PARAMS}"
163-
if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then
164+
if [[ "${MODEL_NAME}" == "stories110M" ]]; then
165+
CHECKPOINT_FILE_NAME="stories110M.pt"
164166
download_stories_model_artifacts
165167
else
166168
echo "Unsupported model name ${MODEL_NAME}"
@@ -181,7 +183,7 @@ fi
181183
# Export model.
182184
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
183185
echo "Exporting ${EXPORTED_MODEL_NAME}"
184-
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
186+
EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
185187
if [[ "${XNNPACK}" == "ON" ]]; then
186188
EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
187189
fi

.ci/scripts/utils.sh

Lines changed: 3 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,9 @@ retry () {
1919
install_executorch() {
2020
which pip
2121
# Install executorch, this assumes that Executorch is checked out in the
22-
# current directory. The --extra-index-url options tell pip to look on the
23-
# pytorch servers for nightly and pre-release versions of torch packages.
24-
pip install . --no-build-isolation -v \
25-
--extra-index-url https://download.pytorch.org/whl/test/cpu \
26-
--extra-index-url https://download.pytorch.org/whl/nightly/cpu
22+
# current directory.
23+
# TODO(T199538337): clean up install scripts to use install_requirements.sh
24+
./install_requirements.sh --pybind xnnpack
2725
# Just print out the list of packages for debugging
2826
pip list
2927
}
@@ -35,42 +33,6 @@ install_pip_dependencies() {
3533
popd || return
3634
}
3735

38-
install_domains() {
39-
echo "Install torchvision and torchaudio"
40-
pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
41-
pip install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${TORCHVISION_VERSION}"
42-
}
43-
44-
install_pytorch_and_domains() {
45-
pushd .ci/docker || return
46-
TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
47-
popd || return
48-
49-
git clone https://github.com/pytorch/pytorch.git
50-
51-
# Fetch the target commit
52-
pushd pytorch || return
53-
git checkout "${TORCH_VERSION}"
54-
git submodule update --init --recursive
55-
56-
export _GLIBCXX_USE_CXX11_ABI=0
57-
# Then build and install PyTorch
58-
python setup.py bdist_wheel
59-
pip install "$(echo dist/*.whl)"
60-
61-
# Grab the pinned audio and vision commits from PyTorch
62-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
63-
export TORCHAUDIO_VERSION
64-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
65-
export TORCHVISION_VERSION
66-
67-
install_domains
68-
69-
popd || return
70-
# Print sccache stats for debugging
71-
sccache --show-stats || true
72-
}
73-
7436
install_flatc_from_source() {
7537
# NB: This function could be used to install flatbuffer from source
7638
pushd third-party/flatbuffers || return

.github/workflows/android-perf.yml

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,24 +135,39 @@ jobs:
135135
fail-fast: false
136136
with:
137137
runner: linux.2xlarge
138-
docker-image: executorch-ubuntu-22.04-clang12
138+
docker-image: executorch-ubuntu-22.04-clang12-android
139139
submodules: 'true'
140140
timeout: 60
141141
upload-artifact: android-models
142142
script: |
143143
# The generic Linux job chooses to use base env, not the one setup by the image
144+
echo "::group::Setting up dev environment"
144145
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
145146
conda activate "${CONDA_ENV}"
146-
147+
if [[ ${{ matrix.delegate }} == "qnn" ]]; then
148+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
149+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
150+
fi
147151
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
148-
echo "Exporting model: ${{ matrix.model }}"
149-
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
152+
ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
153+
echo "::endgroup::"
150154
151-
# TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates.
152-
# Install requirements for export_llama
153-
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
154-
# Test llama2
155-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\
155+
echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}"
156+
BUILD_MODE="cmake"
157+
DTYPE="fp32"
158+
159+
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
160+
# Install requirements for export_llama
161+
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
162+
# Test llama2
163+
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
164+
DELEGATE_CONFIG="xnnpack+custom+qe"
165+
fi
166+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
167+
else
168+
PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
169+
fi
170+
echo "::endgroup::"
156171
157172
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
158173
upload-models:

.github/workflows/lint.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,20 @@ jobs:
5454
lint.json || true
5555
5656
exit $RC
57+
58+
android-java-format:
59+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
60+
with:
61+
runner: linux.2xlarge
62+
docker-image: executorch-ubuntu-22.04-linter
63+
fetch-depth: 0
64+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
65+
script: |
66+
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
67+
examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \
68+
examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java)
69+
if [ -n "$FILES_NEEDS_FORMAT" ]; then
70+
echo "Warning: The following files need formatting. Please use google-java-format."
71+
echo "$FILES_NEEDS_FORMAT"
72+
exit 1
73+
fi

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ jobs:
112112
# Install requirements for export_llama
113113
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
114114
# Test llama2
115-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
115+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
116116
117117
test-llama-runner-linux-android:
118118
name: test-llama-runner-linux-android
@@ -406,4 +406,4 @@ jobs:
406406
# Install requirements for export_llama
407407
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
408408
# Test llama2
409-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
409+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"

.github/workflows/trunk.yml

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ on:
99
- ciflow/trunk/*
1010
pull_request:
1111
paths:
12+
- .ci/docker/ci_commit_pins/pytorch.txt
1213
- .ci/scripts/**
1314
workflow_dispatch:
1415

@@ -142,7 +143,6 @@ jobs:
142143
conda activate "${CONDA_ENV}"
143144
144145
source .ci/scripts/utils.sh
145-
install_flatc_from_source
146146
install_executorch
147147
148148
install_arm
@@ -168,7 +168,6 @@ jobs:
168168
conda activate "${CONDA_ENV}"
169169
170170
source .ci/scripts/utils.sh
171-
install_flatc_from_source
172171
install_executorch
173172
174173
install_arm
@@ -269,4 +268,27 @@ jobs:
269268
# Install requirements for export_llama
270269
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
271270
# Test llama2
272-
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
271+
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
272+
273+
test-qnn-model:
274+
name: test-qnn-model
275+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
276+
strategy:
277+
matrix:
278+
dtype: [fp32]
279+
model: [dl3]
280+
fail-fast: false
281+
with:
282+
runner: linux.2xlarge
283+
docker-image: executorch-ubuntu-22.04-clang12-android
284+
submodules: 'true'
285+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
286+
timeout: 900
287+
script: |
288+
# The generic Linux job chooses to use base env, not the one setup by the image
289+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
290+
conda activate "${CONDA_ENV}"
291+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
292+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
293+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
294+
PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh ${{ matrix.model }} "cmake" "qnn"

.github/workflows/update-viablestrict.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ jobs:
2020
with:
2121
repository: pytorch/executorch
2222
stable-branch: viable/strict
23-
requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"Android\", \"Apple\"]'
23+
requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Android$\", \"^Apple$\"]'
2424
secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
2525
rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}

0 commit comments

Comments
 (0)