pytorch
diff --git a/‎.ci/scripts/build_llama_android.sh
Lines changed: 51 additions & 0 deletions b/‎.ci/scripts/build_llama_android.sh
Lines changed: 51 additions & 0 deletions
diff --git a/‎.ci/scripts/gather_test_models.py
Lines changed: 15 additions & 2 deletions b/‎.ci/scripts/gather_test_models.py
Lines changed: 15 additions & 2 deletions
diff --git a/‎.ci/scripts/setup-macos.sh
Lines changed: 6 additions & 0 deletions b/‎.ci/scripts/setup-macos.sh
Lines changed: 6 additions & 0 deletions
diff --git a/‎.ci/scripts/test.sh
Lines changed: 4 additions & 0 deletions b/‎.ci/scripts/test.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 23 additions & 2 deletions b/‎.ci/scripts/test_llama.sh
Lines changed: 23 additions & 2 deletions
diff --git a/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 33 additions & 0 deletions b/‎.ci/scripts/test_quantized_aot_lib.sh
Lines changed: 33 additions & 0 deletions
diff --git a/‎.ci/scripts/utils.sh
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/utils.sh
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+install_executorch_and_backend_lib() {
+  echo "Installing executorch and xnnpack backend"
+  rm -rf cmake-android-out && mkdir cmake-android-out
+  ANDROID_NDK=/opt/ndk
+  BUCK2=buck2
+  ANDROID_ABI=arm64-v8a
+  cmake -DBUCK2="${BUCK2}" \
+    -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
+    -DANDROID_ABI="${ANDROID_ABI}" \
+    -DANDROID_PLATFORM=android-23 \
+    -DCMAKE_INSTALL_PREFIX=cmake-android-out \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_XNNPACK=ON \
+    -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+    -DXNNPACK_ENABLE_ARM_BF16=OFF \
+    -Bcmake-android-out .
+
+  cmake --build cmake-android-out -j4 --target install --config Release
+}
+
+build_llama_runner() {
+    echo "Building llama runner for Android..."
+    ANDROID_ABI=arm64-v8a
+    cmake -DBUCK2="${BUCK2}" \
+    -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
+    -DANDROID_ABI="${ANDROID_ABI}" \
+    -DANDROID_PLATFORM=android-23 \
+    -DCMAKE_INSTALL_PREFIX=cmake-android-out \
+    -DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
+    -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+    -Bcmake-android-out/examples/models/llama2 examples/models/llama2
+
+    cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
+}
+install_flatc_from_source
+install_executorch_and_backend_lib
+build_llama_runner
@@ -13,7 +13,6 @@
 from examples.models import MODEL_NAME_TO_MODEL
 from examples.xnnpack import MODEL_NAME_TO_OPTIONS
 
-
 DEFAULT_RUNNERS = {
     "linux": "linux.2xlarge",
     "macos": "macos-m1-stable",
@@ -24,6 +23,7 @@
         "w2l": "linux.12xlarge",
         "ic4": "linux.12xlarge",
         "resnet50": "linux.12xlarge",
+        "llava_encoder": "linux.4xlarge",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
         "dl3": "linux.12xlarge",
         "emformer_join": "linux.12xlarge",
@@ -83,7 +83,17 @@ def model_should_run_on_event(model: str, event: str) -> bool:
     We put higher priority and fast models to pull request and rest to push.
     """
     if event == "pull_request":
-        return model in ["add", "ic3", "mv2", "mv3", "resnet18", "vit"]
+        return model in ["add", "ic3", "mv2", "mv3", "resnet18", "vit", "llava_encoder"]
+    return True
+
+
+def model_should_run_on_target_os(model: str, target_os: str) -> bool:
+    """
+    A helper function to decide whether a model should be tested on a target os (linux/macos).
+    For example, a big model can be disabled in macos due to the limited macos resources.
+    """
+    if target_os == "macos":
+        return model not in ["llava_encoder"]
     return True
 
 
@@ -119,6 +129,9 @@ def export_models_for_ci() -> dict[str, dict]:
         if not model_should_run_on_event(name, event):
             continue
 
+        if not model_should_run_on_target_os(name, target_os):
+            continue
+
         if backend == "xnnpack":
             if name not in MODEL_NAME_TO_OPTIONS:
                 continue
 
@@ -104,6 +104,12 @@ print_cmake_info() {
   codesign -f -s - "${CMAKE_EXEC}" || true
 }
 
+setup_macos_env_variables() {
+  CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+  export CMAKE_PREFIX_PATH
+}
+
+setup_macos_env_variables
 # NB: we need buck2 in all cases because cmake build also depends on calling
 # buck2 atm
 install_buck
 
@@ -67,6 +67,10 @@ test_model() {
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
+  if [[ "${MODEL_NAME}" == "llava_encoder" ]]; then
+    # Install requirements for llava
+    bash examples/models/llava_encoder/install_requirements.sh
+  fi
   # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}"
   run_portable_executor_runner
 
@@ -12,7 +12,11 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 MODEL_NAME=$1 # stories110M.pt
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
-
+MODE=${4:-"xnnpack"} # portable or xnnpack
+if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
+    echo "Expecting atleast 4 positional arguments"
+    echo "Usage: [...]"
+fi
 if [[ -z "${MODEL_NAME:-}" ]]; then
   echo "Missing model name, exiting..."
   exit 1
@@ -28,6 +32,11 @@ if [[ -z "${DTYPE:-}" ]]; then
   exit 1
 fi
 
+if [[ -z "${MODE:-}" ]]; then
+  echo "Missing mode, choose portable or xnnpack, exiting..."
+  exit 1
+fi
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -42,11 +51,18 @@ which "${PYTHON_EXECUTABLE}"
 cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
+    if [[ "${MODE}" == "xnnpack" ]]; then
+      XNNPACK=ON
+    else
+      XNNPACK=OFF
+    fi
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
     cmake --build cmake-out -j9 --target install --config Release
@@ -58,6 +74,7 @@ cmake_build_llama_runner() {
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out/${dir} \
         ${dir}
@@ -99,7 +116,11 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
+if [[ "${MODE}" == "xnnpack" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --pt2e_quantize xnnpack_dynamic"
+fi
+$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
 
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+which "${PYTHON_EXECUTABLE}"
+# Just set this variable here, it's cheap even if we use buck2
+CMAKE_OUTPUT_DIR=cmake-out
+
+build_cmake_quantized_aot_lib() {
+  echo "Building quantized aot lib"
+  SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+  CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch"
+  (rm -rf ${CMAKE_OUTPUT_DIR} \
+    && mkdir ${CMAKE_OUTPUT_DIR} \
+    && cd ${CMAKE_OUTPUT_DIR} \
+    && retry cmake -DBUCK2=buck2 \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
+      -DEXECUTORCH_BUILD_QUANTIZED=ON \
+      -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
+
+  cmake --build ${CMAKE_OUTPUT_DIR} -j4
+}
+
+build_cmake_quantized_aot_lib
@@ -134,8 +134,8 @@ cmake_install_executorch_lib() {
 
 download_stories_model_artifacts() {
     # Download stories110M.pt and tokenizer from Github
-  wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
-  wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+  curl -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt
+  curl -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model
   # Create params.json file
   touch params.json
   echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json