Add compatible HuggingFace models to benchmark workflow

Guang Yang · Guang Yang · commit 009f9328522a · 2024-09-17T11:04:15.000-07:00
diff --git a/.ci/scripts/test_hf_model.sh b/.ci/scripts/test_hf_model.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+# Input parameter: Hugging Face model repo (e.g., 'google/gemma-2b')
+HF_MODEL_REPO=$1
+UPLOAD_DIR=${2:-}
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python
+fi
+which "${PYTHON_EXECUTABLE}"
+
+# Extract the model name from the HF_MODEL_REPO by splitting on '/' and replacing '_' with '-'
+ET_MODEL_NAME=$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g')
+# Add the suffix "_xnnpack_fp32" to the model name (currently supported delegate and dtype)
+OUT_ET_MODEL_NAME="${ET_MODEL_NAME}_xnnpack_fp32"
+
+# Files to be handled
+TOKENIZER_FILE="tokenizer.model"
+OUT_TOKENIZER_BIN_FILE="tokenizer.bin"
+
+# Download the tokenizer model using Hugging Face hub
+DOWNLOADED_TOKENIZER_FILE_PATH=$(${PYTHON_EXECUTABLE} -c "
+from huggingface_hub import hf_hub_download
+# Download the tokenizer file from the Hugging Face Hub
+downloaded_path = hf_hub_download(
+    repo_id='${HF_MODEL_REPO}',
+    filename='${TOKENIZER_FILE}'
+)
+print(downloaded_path)
+")
+
+# Check if the tokenizer file was successfully downloaded
+if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
+    echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
+
+    # Convert the tokenizer to binary using the Python module
+    echo "Convert the tokenizer to binary format"
+    "${PYTHON_EXECUTABLE}" -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH" -o "./${OUT_TOKENIZER_BIN_FILE}"
+    ls "./${OUT_TOKENIZER_BIN_FILE}"
+else
+    echo "Failed to download ${TOKENIZER_FILE} from ${HF_MODEL_REPO}."
+    exit 1
+fi
+
+# Export the Hugging Face model
+echo "Export the Hugging Face model ${HF_MODEL_REPO} to ExecuTorch"
+"${PYTHON_EXECUTABLE}" -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME"
+ls -All "./${OUT_ET_MODEL_NAME}.pte"
+
+if [ -n "$UPLOAD_DIR" ]; then
+    echo "Preparing for uploading generated artifacs"
+    zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${OUT_TOKENIZER_BIN_FILE}"
+    mkdir -p "${UPLOAD_DIR}"
+    mv model.zip "${UPLOAD_DIR}"
+fi
+
+if [ "$(uname)" == "Darwin" ]; then
+    CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
+else
+    CMAKE_JOBS=$(( $(nproc) - 1 ))
+fi
+
+cmake_install_executorch_libraries() {
+    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+    rm -rf cmake-out
+    retry cmake \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        -Bcmake-out .
+    cmake --build cmake-out -j "${CMAKE_JOBS}" --target install --config Release
+}
+
+cmake_build_llama_runner() {
+    echo "Building llama runner"
+    dir="examples/models/llama2"
+    retry cmake \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        -Bcmake-out/${dir} \
+        ${dir}
+    cmake --build cmake-out/${dir} -j "${CMAKE_JOBS}" --config Release
+}
+
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+./cmake-out/examples/models/llama2/llama_main --model_path="${OUT_ET_MODEL_NAME}.pte" --tokenizer_path="${OUT_TOKENIZER_BIN_FILE}" --prompt="My name is"
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -106,6 +106,7 @@ jobs:
           declare -A DEVICE_POOL_ARNS
           DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
           DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
+          DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a"
 
           # Resolve device names with their corresponding ARNs
           if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -129,18 +130,20 @@ jobs:
     name: export-models
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     needs: set-parameters
+    secrets: inherit
     strategy:
       matrix:
           model: ${{ fromJson(needs.set-parameters.outputs.models) }}
           delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
       fail-fast: false
     with:
-      runner: linux.2xlarge
+      runner: linux.12xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models
       upload-artifact-to-s3: true
+      secrets-env: EXECUTORCH_HF_TOKEN
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         echo "::group::Setting up dev environment"
@@ -158,7 +161,16 @@ jobs:
         BUILD_MODE="cmake"
         DTYPE="fp32"
 
-        if [[ ${{ matrix.model }} =~ ^stories* ]]; then
+        if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]] && [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
+            pip install -U "huggingface_hub[cli]"
+            huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+            pip install accelerate sentencepiece
+            # TODO(guangyang): Switch to use released transformers library after all required patches are included
+            pip install "git+https://github.com/huggingface/transformers.git@6cc4dfe3f1e8d421c6d6351388e06e9b123cbfe1"
+            # HuggingFace model. Assume the pattern is always like "<org>/<repo>"
+            HF_MODEL_REPO=${{ matrix.model }}
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME}
+        elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
             # Install requirements for export_llama
             PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
             # Test llama2
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -373,36 +373,6 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
-
-        echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
-        rm -rf cmake-out
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out .
-        cmake --build cmake-out -j9 --target install --config Release
-
-        echo "Build llama runner"
-        dir="examples/models/llama2"
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out/${dir} \
-            ${dir}
-        cmake --build cmake-out/${dir} -j9 --config Release
         echo "::endgroup::"
 
         echo "::group::Set up HuggingFace Dependencies"
@@ -415,29 +385,6 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Export to ExecuTorch"
-        TOKENIZER_FILE=tokenizer.model
-        TOKENIZER_BIN_FILE=tokenizer.bin
-        ET_MODEL_NAME=et_model
-        # Fetch the file using a Python one-liner
-        DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
-        from huggingface_hub import hf_hub_download
-        # Download the file from the Hugging Face Hub
-        downloaded_path = hf_hub_download(
-            repo_id='${{ matrix.hf_model_repo }}',
-            filename='${TOKENIZER_FILE}'
-        )
-        print(downloaded_path)
-        ")
-        if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
-            echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
-            python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
-            ls ./tokenizer.bin
-        else
-            echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
-            exit 1
-        fi
-
-        python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
-
-        cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
+        # HuggingFace model. Assume the pattern is always like "<org>/<repo>"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.hf_model_repo }}
         echo "::endgroup::"