Skip to content

Commit db5ccb8

Browse files
committed
Update base for Update on "Dont quantize the current token for attention"
Differential Revision: [D63497872](https://our.internmc.facebook.com/intern/diff/D63497872/) [ghstack-poisoned]
2 parents 80ff73f + 54899fe commit db5ccb8

File tree

1,232 files changed

+46343
-14334
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,232 files changed

+46343
-14334
lines changed

.ci/docker/build.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,15 @@ case "${IMAGE_NAME}" in
3737
ARM_SDK=yes
3838
CLANG_VERSION=12
3939
;;
40+
executorch-ubuntu-22.04-qnn-sdk)
41+
QNN_SDK=yes
42+
CLANG_VERSION=12
43+
;;
4044
executorch-ubuntu-22.04-clang12-android)
4145
LINTRUNNER=""
4246
CLANG_VERSION=12
4347
# From https://developer.android.com/ndk/downloads
44-
ANDROID_NDK_VERSION=r26c
48+
ANDROID_NDK_VERSION=r27b
4549
;;
4650
*)
4751
echo "Invalid image name ${IMAGE_NAME}"
@@ -72,6 +76,7 @@ docker build \
7276
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
7377
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
7478
--build-arg "ARM_SDK=${ARM_SDK:-}" \
79+
--build-arg "QNN_SDK=${QNN_SDK:-}" \
7580
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
7681
-f "${OS}"/Dockerfile \
7782
"$@" \

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
5ba404f68775bb06a1125a100687f86b6d6de6a8
1+
19eff28ff3f19b50da46f5a9ff5f4d4d213806fe

.ci/docker/ci_commit_pins/torchao.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

.ci/docker/common/install_clang.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ install_ubuntu() {
1313
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
1414
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
1515
# Also require LLD linker from llvm and libomp to build PyTorch from source
16-
apt-get install -y lld "libomp-${CLANG_VERSION}-dev"
16+
apt-get install -y lld "libomp-${CLANG_VERSION}-dev" "libc++-${CLANG_VERSION}-dev"
1717

1818
# Use update-alternatives to make this version the default
1919
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50

.ci/docker/ubuntu/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,7 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
8282
# Set up ARM SDK if needed
8383
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi
8484

85+
ARG QNN_SDK
86+
8587
USER ci-user
8688
CMD ["bash"]

.ci/scripts/build_llama_android.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ install_executorch_and_backend_lib() {
1919
cmake -DBUCK2="${BUCK2}" \
2020
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
2121
-DANDROID_ABI="${ANDROID_ABI}" \
22-
-DANDROID_PLATFORM=android-23 \
2322
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
2423
-DCMAKE_BUILD_TYPE=Release \
2524
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -41,16 +40,15 @@ build_llama_runner() {
4140
cmake -DBUCK2="${BUCK2}" \
4241
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
4342
-DANDROID_ABI="${ANDROID_ABI}" \
44-
-DANDROID_PLATFORM=android-23 \
4543
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
4644
-DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
4745
-DEXECUTORCH_BUILD_XNNPACK=ON \
4846
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4947
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
5048
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
51-
-Bcmake-android-out/examples/models/llama2 examples/models/llama2
49+
-Bcmake-android-out/examples/models/llama examples/models/llama
5250

53-
cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
51+
cmake --build cmake-android-out/examples/models/llama -j4 --config Release
5452
}
5553
install_flatc_from_source
5654
install_executorch_and_backend_lib

.ci/scripts/gather_test_models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
"ic4": "linux.12xlarge",
2525
"resnet50": "linux.12xlarge",
2626
"llava": "linux.12xlarge",
27+
"llama3_2_vision_encoder": "linux.12xlarge",
28+
"llama3_2_text_decoder": "linux.12xlarge",
2729
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
2830
"dl3": "linux.12xlarge",
2931
"emformer_join": "linux.12xlarge",
@@ -88,8 +90,8 @@ def model_should_run_on_event(model: str, event: str) -> bool:
8890
if event == "pull_request":
8991
return model in ["mv3", "vit"]
9092
elif event == "push":
91-
# 'emformer_predict' is running super slow. Only run it periodically
92-
return model not in ["emformer_predict"]
93+
# These are super slow. Only run it periodically
94+
return model not in ["dl3", "edsr", "emformer_predict"]
9395
else:
9496
return True
9597

.ci/scripts/setup-linux.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ else
1919
fi
2020

2121
# As Linux job is running inside a Docker container, all of its dependencies
22-
# have already been installed
23-
install_executorch
22+
# have already been installed, so we use PyTorch build from source here instead
23+
# of nightly. This allows CI to test against latest commits from PyTorch
24+
install_executorch "use-pt-pinned-commit"
2425
build_executorch_runner "${BUILD_TOOL}"
26+
do_not_use_nightly_on_ci

.ci/scripts/setup-qnn-deps.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ install_qnn() {
3131
}
3232

3333
setup_libc++() {
34+
clang_version=$1
3435
sudo apt-get update
35-
pkgs_to_check=('libc++-dev')
36+
pkgs_to_check=("libc++-${clang_version}-dev")
3637
j=0
3738
while [ $j -lt ${#pkgs_to_check[*]} ]; do
3839
install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
@@ -47,5 +48,6 @@ setup_libc++() {
4748
done
4849
}
4950

50-
setup_libc++
51+
# This needs to match with the clang version from the Docker image
52+
setup_libc++ 12
5153
install_qnn

.ci/scripts/test_eval_llama_mmlu.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--tasks mmlu \
44+
-f 5 \
45+
--max_seq_length 2048 \
46+
--limit 5 > result.txt
47+
48+
# Verify result.txt
49+
RESULT=$(cat result.txt)
50+
EXPECTED_TASK="mmlu"
51+
EXPECTED_RESULT="acc"
52+
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
53+
echo "Actual result: ${RESULT}"
54+
echo "Success"
55+
exit 0
56+
else
57+
echo "Actual result: ${RESULT}"
58+
echo "Failure; results not the same"
59+
exit 1
60+
fi
61+
}
62+
63+
prepare_model_artifacts
64+
run_and_verify
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--max_seq_length 2048 \
44+
--limit 5 > result.txt
45+
46+
# Verify result.txt
47+
RESULT=$(cat result.txt)
48+
EXPECTED_TASK="wikitext"
49+
EXPECTED_RESULT="word_perplexity"
50+
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
51+
echo "Actual result: ${RESULT}"
52+
echo "Success"
53+
exit 0
54+
else
55+
echo "Actual result: ${RESULT}"
56+
echo "Failure; results not the same"
57+
exit 1
58+
fi
59+
}
60+
61+
prepare_model_artifacts
62+
run_and_verify

.ci/scripts/test_llama.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {
125125

126126
cmake_build_llama_runner() {
127127
echo "Building llama runner"
128-
dir="examples/models/llama2"
128+
dir="examples/models/llama"
129129
retry cmake \
130130
-DCMAKE_INSTALL_PREFIX=cmake-out \
131131
-DCMAKE_BUILD_TYPE=Debug \
@@ -171,7 +171,7 @@ else
171171
fi
172172

173173
# Check dtype.
174-
EXPORTED_MODEL_NAME="llama2"
174+
EXPORTED_MODEL_NAME="tinyllama_${MODE}_${DTYPE}"
175175
if [[ "${DTYPE}" == "fp16" ]]; then
176176
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
177177
elif [[ "${DTYPE}" == "bf16" ]]; then
@@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
206206
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
207207
fi
208208
# Add dynamically linked library location
209-
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
209+
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
210210

211211
# Create tokenizer.bin.
212212
echo "Creating tokenizer.bin"
@@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
219219
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
220220
# Run model.
221221
# shellcheck source=/dev/null
222-
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
222+
$BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
223223
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
224224
cmake_install_executorch_libraries
225225
cmake_build_llama_runner
226226
# Run llama runner
227227
NOW=$(date +"%H:%M:%S")
228228
echo "Starting to run llama runner at ${NOW}"
229229
# shellcheck source=/dev/null
230-
cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
230+
cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
231231
NOW=$(date +"%H:%M:%S")
232232
echo "Finished at ${NOW}"
233233
else
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--max_seq_length 32 \
44+
--temperature 0 \
45+
--show_tokens \
46+
--prompt "Once upon a time," > result.txt
47+
48+
# Verify result.txt
49+
RESULT=$(cat result.txt)
50+
EXPECTED_RESULT="727, 471, 263, 2217, 7826, 4257, 365, 2354, 29889, 2296, 18012, 304, 1708, 5377, 297, 278, 6575, 845, 457, 29889, 3118, 2462, 29892, 1183, 4446, 263"
51+
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
52+
echo "Actual result: ${RESULT}"
53+
echo "Success"
54+
exit 0
55+
else
56+
echo "Actual result: ${RESULT}"
57+
echo "Failure; results not the same"
58+
exit 1
59+
fi
60+
}
61+
62+
prepare_model_artifacts
63+
run_and_verify

.ci/scripts/test_llava.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ cmake_install_executorch_libraries_for_android() {
5656
cmake \
5757
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
5858
-DANDROID_ABI=arm64-v8a \
59-
-DANDROID_PLATFORM=android-23 \
6059
${EXECUTORCH_COMMON_CMAKE_ARGS} \
6160
-B${BUILD_DIR} .
6261

@@ -93,7 +92,6 @@ cmake_build_llava_runner_for_android() {
9392
cmake \
9493
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
9594
-DANDROID_ABI=arm64-v8a \
96-
-DANDROID_PLATFORM=android-23 \
9795
${LLAVA_COMMON_CMAKE_ARGS} \
9896
-DCMAKE_PREFIX_PATH="$python_lib" \
9997
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \

0 commit comments

Comments
 (0)