pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 6 additions & 1 deletion b/‎.ci/docker/build.sh
Lines changed: 6 additions & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/torchao.txt
Lines changed: 0 additions & 1 deletion b/‎.ci/docker/ci_commit_pins/torchao.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/docker/common/install_clang.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_clang.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ubuntu/Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/build_llama_android.sh
Lines changed: 2 additions & 4 deletions b/‎.ci/scripts/build_llama_android.sh
Lines changed: 2 additions & 4 deletions
diff --git a/‎.ci/scripts/gather_test_models.py
Lines changed: 4 additions & 2 deletions b/‎.ci/scripts/gather_test_models.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎.ci/scripts/setup-linux.sh
Lines changed: 4 additions & 2 deletions b/‎.ci/scripts/setup-linux.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎.ci/scripts/setup-qnn-deps.sh
Lines changed: 4 additions & 2 deletions b/‎.ci/scripts/setup-qnn-deps.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎.ci/scripts/test_eval_llama_mmlu.sh
Lines changed: 64 additions & 0 deletions b/‎.ci/scripts/test_eval_llama_mmlu.sh
Lines changed: 64 additions & 0 deletions
diff --git a/‎.ci/scripts/test_eval_llama_wikitext.sh
Lines changed: 62 additions & 0 deletions b/‎.ci/scripts/test_eval_llama_wikitext.sh
Lines changed: 62 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 5 additions & 5 deletions b/‎.ci/scripts/test_llama.sh
Lines changed: 5 additions & 5 deletions
diff --git a/‎.ci/scripts/test_llama_runner_eager.sh
Lines changed: 63 additions & 0 deletions b/‎.ci/scripts/test_llama_runner_eager.sh
Lines changed: 63 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 0 additions & 2 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 0 additions & 2 deletions
@@ -37,11 +37,15 @@ case "${IMAGE_NAME}" in
     ARM_SDK=yes
     CLANG_VERSION=12
     ;;
+  executorch-ubuntu-22.04-qnn-sdk)
+    QNN_SDK=yes
+    CLANG_VERSION=12
+    ;;
   executorch-ubuntu-22.04-clang12-android)
     LINTRUNNER=""
     CLANG_VERSION=12
     # From https://developer.android.com/ndk/downloads
-    ANDROID_NDK_VERSION=r26c
+    ANDROID_NDK_VERSION=r27b
     ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
@@ -72,6 +76,7 @@ docker build \
   --build-arg "LINTRUNNER=${LINTRUNNER:-}" \
   --build-arg "BUILD_DOCS=${BUILD_DOCS}" \
   --build-arg "ARM_SDK=${ARM_SDK:-}" \
+  --build-arg "QNN_SDK=${QNN_SDK:-}" \
   --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
   -f "${OS}"/Dockerfile \
   "$@" \
 
@@ -1 +1 @@
-5ba404f68775bb06a1125a100687f86b6d6de6a8
+19eff28ff3f19b50da46f5a9ff5f4d4d213806fe
@@ -13,7 +13,7 @@ install_ubuntu() {
   apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
   apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
   # Also require LLD linker from llvm and libomp to build PyTorch from source
-  apt-get install -y lld "libomp-${CLANG_VERSION}-dev"
+  apt-get install -y lld "libomp-${CLANG_VERSION}-dev" "libc++-${CLANG_VERSION}-dev"
 
   # Use update-alternatives to make this version the default
   update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
 
@@ -82,5 +82,7 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
 # Set up ARM SDK if needed
 RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi
 
+ARG QNN_SDK
+
 USER ci-user
 CMD ["bash"]
@@ -19,7 +19,6 @@ install_executorch_and_backend_lib() {
   cmake -DBUCK2="${BUCK2}" \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
     -DANDROID_ABI="${ANDROID_ABI}" \
-    -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -41,16 +40,15 @@ build_llama_runner() {
     cmake -DBUCK2="${BUCK2}" \
     -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
     -DANDROID_ABI="${ANDROID_ABI}" \
-    -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-    -Bcmake-android-out/examples/models/llama2 examples/models/llama2
+    -Bcmake-android-out/examples/models/llama examples/models/llama
 
-    cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
+    cmake --build cmake-android-out/examples/models/llama -j4 --config Release
 }
 install_flatc_from_source
 install_executorch_and_backend_lib
 
@@ -24,6 +24,8 @@
         "ic4": "linux.12xlarge",
         "resnet50": "linux.12xlarge",
         "llava": "linux.12xlarge",
+        "llama3_2_vision_encoder": "linux.12xlarge",
+        "llama3_2_text_decoder": "linux.12xlarge",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
         "dl3": "linux.12xlarge",
         "emformer_join": "linux.12xlarge",
@@ -88,8 +90,8 @@ def model_should_run_on_event(model: str, event: str) -> bool:
     if event == "pull_request":
         return model in ["mv3", "vit"]
     elif event == "push":
-        # 'emformer_predict' is running super slow. Only run it periodically
-        return model not in ["emformer_predict"]
+        # These are super slow. Only run it periodically
+        return model not in ["dl3", "edsr", "emformer_predict"]
     else:
         return True
 
 
@@ -19,6 +19,8 @@ else
 fi
 
 # As Linux job is running inside a Docker container, all of its dependencies
-# have already been installed
-install_executorch
+# have already been installed, so we use PyTorch build from source here instead
+# of nightly. This allows CI to test against latest commits from PyTorch
+install_executorch "use-pt-pinned-commit"
 build_executorch_runner "${BUILD_TOOL}"
+do_not_use_nightly_on_ci
@@ -31,8 +31,9 @@ install_qnn() {
 }
 
 setup_libc++() {
+  clang_version=$1
   sudo apt-get update
-  pkgs_to_check=('libc++-dev')
+  pkgs_to_check=("libc++-${clang_version}-dev")
   j=0
   while [ $j -lt ${#pkgs_to_check[*]} ]; do
     install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
@@ -47,5 +48,6 @@ setup_libc++() {
   done
 }
 
-setup_libc++
+# This needs to match with the clang version from the Docker image
+setup_libc++ 12
 install_qnn
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--tasks mmlu \
+	-f 5 \
+	--max_seq_length 2048 \
+	--limit 5 > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_TASK="mmlu"
+    EXPECTED_RESULT="acc"
+    if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--max_seq_length 2048 \
+	--limit 5 > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_TASK="wikitext"
+    EXPECTED_RESULT="word_perplexity"
+    if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {
 
 cmake_build_llama_runner() {
     echo "Building llama runner"
-    dir="examples/models/llama2"
+    dir="examples/models/llama"
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Debug \
@@ -171,7 +171,7 @@ else
 fi
 
 # Check dtype.
-EXPORTED_MODEL_NAME="llama2"
+EXPORTED_MODEL_NAME="tinyllama_${MODE}_${DTYPE}"
 if [[ "${DTYPE}" == "fp16" ]]; then
   EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
 elif [[ "${DTYPE}" == "bf16" ]]; then
@@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
 fi
 # Add dynamically linked library location
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
+$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
@@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
 if [[ "${BUILD_TOOL}" == "buck2" ]]; then
   # Run model.
   # shellcheck source=/dev/null
-  $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
+  $BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
 elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
   cmake_install_executorch_libraries
   cmake_build_llama_runner
   # Run llama runner
   NOW=$(date +"%H:%M:%S")
   echo "Starting to run llama runner at ${NOW}"
   # shellcheck source=/dev/null
-  cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
+  cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
   NOW=$(date +"%H:%M:%S")
   echo "Finished at ${NOW}"
 else
 
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--max_seq_length 32 \
+	--temperature 0 \
+    --show_tokens \
+	--prompt "Once upon a time," > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_RESULT="727, 471, 263, 2217, 7826, 4257, 365, 2354, 29889, 2296, 18012, 304, 1708, 5377, 297, 278, 6575, 845, 457, 29889, 3118, 2462, 29892, 1183, 4446, 263"
+    if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -56,7 +56,6 @@ cmake_install_executorch_libraries_for_android() {
     cmake                                                                       \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
-        -DANDROID_PLATFORM=android-23                                           \
         ${EXECUTORCH_COMMON_CMAKE_ARGS}                                         \
         -B${BUILD_DIR} .
 
@@ -93,7 +92,6 @@ cmake_build_llava_runner_for_android() {
     cmake                                                                       \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
-        -DANDROID_PLATFORM=android-23                                           \
         ${LLAVA_COMMON_CMAKE_ARGS}                                              \
         -DCMAKE_PREFIX_PATH="$python_lib"                                       \
         -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON                                  \
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-5ba404f68775bb06a1125a100687f86b6d6de6a8`
	`1`	`+19eff28ff3f19b50da46f5a9ff5f4d4d213806fe`