Skip to content

Commit b2d837e

Browse files
author
Guang Yang
committed
Add compatible HuggingFace models to benchmark workflow
1 parent 9c38cf7 commit b2d837e

File tree

3 files changed

+128
-57
lines changed

3 files changed

+128
-57
lines changed

.ci/scripts/test_hf_model.sh

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
# shellcheck source=/dev/null
11+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
12+
13+
# Input parameter: Hugging Face model repo (e.g., 'google/gemma-2b')
14+
HF_MODEL_REPO=$1
15+
UPLOAD_DIR=${2:-}
16+
DTYPE=${3:-"float32"}
17+
18+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
19+
PYTHON_EXECUTABLE=python
20+
fi
21+
which "${PYTHON_EXECUTABLE}"
22+
23+
# Extract the model name from the HF_MODEL_REPO by splitting on '/' and replacing '_' with '-'
24+
ET_MODEL_NAME=$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g')
25+
# Add the suffix "_xnnpack_fp32" to the model name (currently supported delegate and dtype)
26+
OUT_ET_MODEL_NAME="${ET_MODEL_NAME}_xnnpack_fp32"
27+
28+
# Files to be handled
29+
TOKENIZER_FILE="tokenizer.model"
30+
OUT_TOKENIZER_BIN_FILE="tokenizer.bin"
31+
32+
# Download the tokenizer model using Hugging Face hub
33+
DOWNLOADED_TOKENIZER_FILE_PATH=$(${PYTHON_EXECUTABLE} -c "
34+
from huggingface_hub import hf_hub_download
35+
# Download the tokenizer file from the Hugging Face Hub
36+
downloaded_path = hf_hub_download(
37+
repo_id='${HF_MODEL_REPO}',
38+
filename='${TOKENIZER_FILE}'
39+
)
40+
print(downloaded_path)
41+
")
42+
43+
# Check if the tokenizer file was successfully downloaded
44+
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
45+
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
46+
47+
# Convert the tokenizer to binary using the Python module
48+
echo "Convert the tokenizer to binary format"
49+
"${PYTHON_EXECUTABLE}" -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH" -o "./${OUT_TOKENIZER_BIN_FILE}"
50+
ls "./${OUT_TOKENIZER_BIN_FILE}"
51+
else
52+
echo "Failed to download ${TOKENIZER_FILE} from ${HF_MODEL_REPO}."
53+
exit 1
54+
fi
55+
56+
# Export the Hugging Face model
57+
echo "Export the Hugging Face model ${HF_MODEL_REPO} to ExecuTorch"
58+
"${PYTHON_EXECUTABLE}" -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" -d "$DTYPE"
59+
ls -All "./${OUT_ET_MODEL_NAME}.pte"
60+
61+
if [ -n "$UPLOAD_DIR" ]; then
62+
echo "Preparing for uploading generated artifacs"
63+
zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${OUT_TOKENIZER_BIN_FILE}"
64+
mkdir -p "${UPLOAD_DIR}"
65+
mv model.zip "${UPLOAD_DIR}"
66+
fi
67+
68+
if [ "$(uname)" == "Darwin" ]; then
69+
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
70+
else
71+
CMAKE_JOBS=$(( $(nproc) - 1 ))
72+
fi
73+
74+
cmake_install_executorch_libraries() {
75+
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
76+
rm -rf cmake-out
77+
retry cmake \
78+
-DCMAKE_INSTALL_PREFIX=cmake-out \
79+
-DCMAKE_BUILD_TYPE=Release \
80+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
81+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
82+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
83+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
84+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
85+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
86+
-DEXECUTORCH_BUILD_XNNPACK=ON \
87+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
88+
-Bcmake-out .
89+
cmake --build cmake-out -j "${CMAKE_JOBS}" --target install --config Release
90+
}
91+
92+
cmake_build_llama_runner() {
93+
echo "Building llama runner"
94+
dir="examples/models/llama2"
95+
retry cmake \
96+
-DCMAKE_INSTALL_PREFIX=cmake-out \
97+
-DCMAKE_BUILD_TYPE=Release \
98+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
99+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
100+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
101+
-DEXECUTORCH_BUILD_XNNPACK=ON \
102+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
103+
-Bcmake-out/${dir} \
104+
${dir}
105+
cmake --build cmake-out/${dir} -j "${CMAKE_JOBS}" --config Release
106+
}
107+
108+
cmake_install_executorch_libraries
109+
cmake_build_llama_runner
110+
111+
./cmake-out/examples/models/llama2/llama_main --model_path="${OUT_ET_MODEL_NAME}.pte" --tokenizer_path="${OUT_TOKENIZER_BIN_FILE}" --prompt="My name is"

.github/workflows/android-perf.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ jobs:
106106
declare -A DEVICE_POOL_ARNS
107107
DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
108108
DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
109+
DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a"
109110
110111
# Resolve device names with their corresponding ARNs
111112
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -129,18 +130,20 @@ jobs:
129130
name: export-models
130131
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
131132
needs: set-parameters
133+
secrets: inherit
132134
strategy:
133135
matrix:
134136
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
135137
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
136138
fail-fast: false
137139
with:
138-
runner: linux.2xlarge
140+
runner: linux.12xlarge
139141
docker-image: executorch-ubuntu-22.04-clang12-android
140142
submodules: 'true'
141143
timeout: 60
142144
upload-artifact: android-models
143145
upload-artifact-to-s3: true
146+
secrets-env: EXECUTORCH_HF_TOKEN
144147
script: |
145148
# The generic Linux job chooses to use base env, not the one setup by the image
146149
echo "::group::Setting up dev environment"
@@ -158,7 +161,17 @@ jobs:
158161
BUILD_MODE="cmake"
159162
DTYPE="fp32"
160163
161-
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
164+
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]] && [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
165+
pip install -U "huggingface_hub[cli]"
166+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
167+
pip install accelerate sentencepiece
168+
# TODO(guangyang): Switch to use released transformers library after all required patches are included
169+
pip install "git+https://github.com/huggingface/transformers.git@6cc4dfe3f1e8d421c6d6351388e06e9b123cbfe1"
170+
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
171+
HF_MODEL_REPO=${{ matrix.model }}
172+
DTYPE="bfloat16"
173+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME} ${DTYPE}
174+
elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
162175
# Install requirements for export_llama
163176
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
164177
# Test llama2

.github/workflows/trunk.yml

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -362,36 +362,6 @@ jobs:
362362
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
363363
conda activate "${CONDA_ENV}"
364364
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
365-
366-
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
367-
rm -rf cmake-out
368-
cmake \
369-
-DCMAKE_INSTALL_PREFIX=cmake-out \
370-
-DCMAKE_BUILD_TYPE=Release \
371-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
372-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
373-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
374-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
375-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
376-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
377-
-DEXECUTORCH_BUILD_XNNPACK=ON \
378-
-DPYTHON_EXECUTABLE=python \
379-
-Bcmake-out .
380-
cmake --build cmake-out -j9 --target install --config Release
381-
382-
echo "Build llama runner"
383-
dir="examples/models/llama2"
384-
cmake \
385-
-DCMAKE_INSTALL_PREFIX=cmake-out \
386-
-DCMAKE_BUILD_TYPE=Release \
387-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
388-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
389-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
390-
-DEXECUTORCH_BUILD_XNNPACK=ON \
391-
-DPYTHON_EXECUTABLE=python \
392-
-Bcmake-out/${dir} \
393-
${dir}
394-
cmake --build cmake-out/${dir} -j9 --config Release
395365
echo "::endgroup::"
396366
397367
echo "::group::Set up HuggingFace Dependencies"
@@ -408,29 +378,6 @@ jobs:
408378
echo "::endgroup::"
409379
410380
echo "::group::Export to ExecuTorch"
411-
TOKENIZER_FILE=tokenizer.model
412-
TOKENIZER_BIN_FILE=tokenizer.bin
413-
ET_MODEL_NAME=et_model
414-
# Fetch the file using a Python one-liner
415-
DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
416-
from huggingface_hub import hf_hub_download
417-
# Download the file from the Hugging Face Hub
418-
downloaded_path = hf_hub_download(
419-
repo_id='${{ matrix.hf_model_repo }}',
420-
filename='${TOKENIZER_FILE}'
421-
)
422-
print(downloaded_path)
423-
")
424-
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
425-
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
426-
python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
427-
ls ./tokenizer.bin
428-
else
429-
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
430-
exit 1
431-
fi
432-
433-
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
434-
435-
cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
381+
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
382+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.hf_model_repo }}
436383
echo "::endgroup::"

0 commit comments

Comments
 (0)