Skip to content

Commit 009f932

Browse files
author
Guang Yang
committed
Add compatible HuggingFace models to benchmark workflow
1 parent 1645af0 commit 009f932

File tree

3 files changed

+126
-57
lines changed

3 files changed

+126
-57
lines changed

.ci/scripts/test_hf_model.sh

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
# shellcheck source=/dev/null
11+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
12+
13+
# Input parameter: Hugging Face model repo (e.g., 'google/gemma-2b')
14+
HF_MODEL_REPO=$1
15+
UPLOAD_DIR=${2:-}
16+
17+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
18+
PYTHON_EXECUTABLE=python
19+
fi
20+
which "${PYTHON_EXECUTABLE}"
21+
22+
# Extract the model name from the HF_MODEL_REPO by splitting on '/' and replacing '_' with '-'
23+
ET_MODEL_NAME=$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g')
24+
# Add the suffix "_xnnpack_fp32" to the model name (currently supported delegate and dtype)
25+
OUT_ET_MODEL_NAME="${ET_MODEL_NAME}_xnnpack_fp32"
26+
27+
# Files to be handled
28+
TOKENIZER_FILE="tokenizer.model"
29+
OUT_TOKENIZER_BIN_FILE="tokenizer.bin"
30+
31+
# Download the tokenizer model using Hugging Face hub
32+
DOWNLOADED_TOKENIZER_FILE_PATH=$(${PYTHON_EXECUTABLE} -c "
33+
from huggingface_hub import hf_hub_download
34+
# Download the tokenizer file from the Hugging Face Hub
35+
downloaded_path = hf_hub_download(
36+
repo_id='${HF_MODEL_REPO}',
37+
filename='${TOKENIZER_FILE}'
38+
)
39+
print(downloaded_path)
40+
")
41+
42+
# Check if the tokenizer file was successfully downloaded
43+
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
44+
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
45+
46+
# Convert the tokenizer to binary using the Python module
47+
echo "Convert the tokenizer to binary format"
48+
"${PYTHON_EXECUTABLE}" -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH" -o "./${OUT_TOKENIZER_BIN_FILE}"
49+
ls "./${OUT_TOKENIZER_BIN_FILE}"
50+
else
51+
echo "Failed to download ${TOKENIZER_FILE} from ${HF_MODEL_REPO}."
52+
exit 1
53+
fi
54+
55+
# Export the Hugging Face model
56+
echo "Export the Hugging Face model ${HF_MODEL_REPO} to ExecuTorch"
57+
"${PYTHON_EXECUTABLE}" -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME"
58+
ls -All "./${OUT_ET_MODEL_NAME}.pte"
59+
60+
if [ -n "$UPLOAD_DIR" ]; then
61+
echo "Preparing for uploading generated artifacs"
62+
zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${OUT_TOKENIZER_BIN_FILE}"
63+
mkdir -p "${UPLOAD_DIR}"
64+
mv model.zip "${UPLOAD_DIR}"
65+
fi
66+
67+
if [ "$(uname)" == "Darwin" ]; then
68+
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
69+
else
70+
CMAKE_JOBS=$(( $(nproc) - 1 ))
71+
fi
72+
73+
cmake_install_executorch_libraries() {
74+
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
75+
rm -rf cmake-out
76+
retry cmake \
77+
-DCMAKE_INSTALL_PREFIX=cmake-out \
78+
-DCMAKE_BUILD_TYPE=Release \
79+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
80+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
81+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
82+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
83+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
84+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
85+
-DEXECUTORCH_BUILD_XNNPACK=ON \
86+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
87+
-Bcmake-out .
88+
cmake --build cmake-out -j "${CMAKE_JOBS}" --target install --config Release
89+
}
90+
91+
cmake_build_llama_runner() {
92+
echo "Building llama runner"
93+
dir="examples/models/llama2"
94+
retry cmake \
95+
-DCMAKE_INSTALL_PREFIX=cmake-out \
96+
-DCMAKE_BUILD_TYPE=Release \
97+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
98+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
99+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
100+
-DEXECUTORCH_BUILD_XNNPACK=ON \
101+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
102+
-Bcmake-out/${dir} \
103+
${dir}
104+
cmake --build cmake-out/${dir} -j "${CMAKE_JOBS}" --config Release
105+
}
106+
107+
cmake_install_executorch_libraries
108+
cmake_build_llama_runner
109+
110+
./cmake-out/examples/models/llama2/llama_main --model_path="${OUT_ET_MODEL_NAME}.pte" --tokenizer_path="${OUT_TOKENIZER_BIN_FILE}" --prompt="My name is"

.github/workflows/android-perf.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ jobs:
106106
declare -A DEVICE_POOL_ARNS
107107
DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
108108
DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
109+
DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a"
109110
110111
# Resolve device names with their corresponding ARNs
111112
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -129,18 +130,20 @@ jobs:
129130
name: export-models
130131
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
131132
needs: set-parameters
133+
secrets: inherit
132134
strategy:
133135
matrix:
134136
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
135137
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
136138
fail-fast: false
137139
with:
138-
runner: linux.2xlarge
140+
runner: linux.12xlarge
139141
docker-image: executorch-ubuntu-22.04-clang12-android
140142
submodules: 'true'
141143
timeout: 60
142144
upload-artifact: android-models
143145
upload-artifact-to-s3: true
146+
secrets-env: EXECUTORCH_HF_TOKEN
144147
script: |
145148
# The generic Linux job chooses to use base env, not the one setup by the image
146149
echo "::group::Setting up dev environment"
@@ -158,7 +161,16 @@ jobs:
158161
BUILD_MODE="cmake"
159162
DTYPE="fp32"
160163
161-
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
164+
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]] && [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
165+
pip install -U "huggingface_hub[cli]"
166+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
167+
pip install accelerate sentencepiece
168+
# TODO(guangyang): Switch to use released transformers library after all required patches are included
169+
pip install "git+https://github.com/huggingface/transformers.git@6cc4dfe3f1e8d421c6d6351388e06e9b123cbfe1"
170+
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
171+
HF_MODEL_REPO=${{ matrix.model }}
172+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME}
173+
elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
162174
# Install requirements for export_llama
163175
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
164176
# Test llama2

.github/workflows/trunk.yml

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -373,36 +373,6 @@ jobs:
373373
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
374374
conda activate "${CONDA_ENV}"
375375
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
376-
377-
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
378-
rm -rf cmake-out
379-
cmake \
380-
-DCMAKE_INSTALL_PREFIX=cmake-out \
381-
-DCMAKE_BUILD_TYPE=Release \
382-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
383-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
384-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
385-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
386-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
387-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
388-
-DEXECUTORCH_BUILD_XNNPACK=ON \
389-
-DPYTHON_EXECUTABLE=python \
390-
-Bcmake-out .
391-
cmake --build cmake-out -j9 --target install --config Release
392-
393-
echo "Build llama runner"
394-
dir="examples/models/llama2"
395-
cmake \
396-
-DCMAKE_INSTALL_PREFIX=cmake-out \
397-
-DCMAKE_BUILD_TYPE=Release \
398-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
399-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
400-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
401-
-DEXECUTORCH_BUILD_XNNPACK=ON \
402-
-DPYTHON_EXECUTABLE=python \
403-
-Bcmake-out/${dir} \
404-
${dir}
405-
cmake --build cmake-out/${dir} -j9 --config Release
406376
echo "::endgroup::"
407377
408378
echo "::group::Set up HuggingFace Dependencies"
@@ -415,29 +385,6 @@ jobs:
415385
echo "::endgroup::"
416386
417387
echo "::group::Export to ExecuTorch"
418-
TOKENIZER_FILE=tokenizer.model
419-
TOKENIZER_BIN_FILE=tokenizer.bin
420-
ET_MODEL_NAME=et_model
421-
# Fetch the file using a Python one-liner
422-
DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
423-
from huggingface_hub import hf_hub_download
424-
# Download the file from the Hugging Face Hub
425-
downloaded_path = hf_hub_download(
426-
repo_id='${{ matrix.hf_model_repo }}',
427-
filename='${TOKENIZER_FILE}'
428-
)
429-
print(downloaded_path)
430-
")
431-
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
432-
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
433-
python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
434-
ls ./tokenizer.bin
435-
else
436-
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
437-
exit 1
438-
fi
439-
440-
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
441-
442-
cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
388+
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
389+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.hf_model_repo }}
443390
echo "::endgroup::"

0 commit comments

Comments
 (0)