Llama stories oss ci (#1973)

lucylq · facebook-github-bot · commit 8fe00b502538 · 2024-02-14T23:33:26.000-08:00
Summary: Pull Request resolved: #1973 Test fp16 and fp32 with buck2. follow on: add tests for kv-cache and xnnpack. Reviewed By: larryliu0820 Differential Revision: D53729839 fbshipit-source-id: 5a25ff6538ba9d9df565249cccc89c213319f903
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+MODEL_NAME=$1 # stories110M.pt
+BUILD_TOOL=$2 # buck2
+DTYPE=$3 # fp16 or fp32
+
+if [[ -z "${MODEL_NAME:-}" ]]; then
+  echo "Missing model name, exiting..."
+  exit 1
+fi
+
+if [[ -z "${BUILD_TOOL:-}" ]]; then
+  echo "Missing build tool (require buck2 or cmake), exiting..."
+  exit
+fi
+
+if [[ -z "${DTYPE:-}" ]]; then
+  echo "Missing dtype, choose fp16 or fp32, exiting..."
+  exit 1
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Check build tool.
+if [[ "${BUILD_TOOL}" == "buck2" ]]; then
+  :
+else
+  echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
+  exit 1
+fi
+
+cleanup_files() {
+  echo "Deleting downloaded and generated files"
+  rm "${MODEL_NAME}"
+  rm tokenizer.model
+  rm tokenizer.bin
+  rm "${EXPORTED_MODEL_NAME}"
+}
+
+# Download and create artifacts.
+PARAMS="params.json"
+touch "${PARAMS}"
+if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then
+  # Download stories110M.pt and tokenizer from Github
+  wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+  wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+  # Create params.json file
+  echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > "${PARAMS}"
+else
+  echo "Unsupported model name ${MODEL_NAME}"
+  exit 1
+fi
+
+# Check dtype.
+EXPORTED_MODEL_NAME="llama2"
+if [[ "${DTYPE}" == "fp16" ]]; then
+  EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
+elif [[ "${DTYPE}" == "fp32" ]]; then
+  :
+else
+  echo "Unsupported dtype ${DTYPE}"
+  exit 1
+fi
+
+# Export model.
+EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
+echo "Exporting ${EXPORTED_MODEL_NAME}"
+python3 -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
+
+# Create tokenizer.bin.
+echo "Creating tokenizer.bin"
+buck2 run examples/models/llama2/tokenizer:tokenizer_py -- -t tokenizer.model -o tokenizer.bin
+
+# Run model.
+echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
+RESULT=$(timeout 500s buck2 run examples/models/llama2:main -- --model_path="${EXPORTED_MODEL_NAME}" --tokenizer_path=tokenizer.bin --prompt="Once" --temperature=0) || true
+
+# Check results.
+EXPECTED_PREFIX="Once upon a time,"
+# Expected result - may take too long to generate:
+# "Once upon a time, there was a little girl named Lily. She loved to play outside" ...
+if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Success"
+
+  cleanup_files
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Failure; results not the same"
+
+  cleanup_files
+  exit 1
+fi
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -83,6 +83,33 @@ jobs:
         # Build and test ExecuTorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
 
+  test-llama-linux:
+    name: test-llama-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp16, fp32]
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        DTYPE=${{ matrix.dtype }}
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt buck2 "${DTYPE}"
+
   test-custom-ops-linux:
     name: test-custom-ops-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main