pytorch
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 6 additions & 4 deletions b/‎.ci/scripts/test_llama.sh
Lines changed: 6 additions & 4 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pull.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎build/build_android_llm_demo.sh
Lines changed: 6 additions & 0 deletions b/‎build/build_android_llm_demo.sh
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/source/api-life-cycle.md
Lines changed: 13 additions & 11 deletions b/‎docs/source/api-life-cycle.md
Lines changed: 13 additions & 11 deletions
diff --git a/‎examples/models/llama2/install_requirements.sh
Lines changed: 1 addition & 1 deletion b/‎examples/models/llama2/install_requirements.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/models/llava/runner/llava_image_prefiller.h
Lines changed: 2 additions & 3 deletions b/‎examples/models/llava/runner/llava_image_prefiller.h
Lines changed: 2 additions & 3 deletions
diff --git a/‎examples/models/llava/runner/targets.bzl
Lines changed: 22 additions & 0 deletions b/‎examples/models/llava/runner/targets.bzl
Lines changed: 22 additions & 0 deletions
diff --git a/‎exir/tests/TARGETS
Lines changed: 2 additions & 0 deletions b/‎exir/tests/TARGETS
Lines changed: 2 additions & 0 deletions
diff --git a/‎exir/tests/test_joint_graph.py
Lines changed: 19 additions & 0 deletions b/‎exir/tests/test_joint_graph.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎extension/android/CMakeLists.txt
Lines changed: 6 additions & 0 deletions b/‎extension/android/CMakeLists.txt
Lines changed: 6 additions & 0 deletions
@@ -9,7 +9,7 @@ set -exu
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-MODEL_NAME=$1 # stories110M.pt
+MODEL_NAME=$1 # stories110M
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
 MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
@@ -140,7 +140,7 @@ cmake_build_llama_runner() {
 
 cleanup_files() {
   echo "Deleting downloaded and generated files"
-  rm "${MODEL_NAME}"
+  rm "${CHECKPOINT_FILE_NAME}"
   rm tokenizer.model
   rm tokenizer.bin
   rm "${EXPORTED_MODEL_NAME}"
@@ -159,8 +159,10 @@ prepare_artifacts_upload() {
 
 # Download and create artifacts.
 PARAMS="params.json"
+CHECKPOINT_FILE_NAME=""
 touch "${PARAMS}"
-if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then
+if [[ "${MODEL_NAME}" == "stories110M" ]]; then
+  CHECKPOINT_FILE_NAME="stories110M.pt"
   download_stories_model_artifacts
 else
   echo "Unsupported model name ${MODEL_NAME}"
@@ -181,7 +183,7 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
+EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
 if [[ "${XNNPACK}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
 fi
 
@@ -156,14 +156,14 @@ jobs:
         BUILD_MODE="cmake"
         DTYPE="fp32"
 
-        if [[ ${{ matrix.model }} == "llama*" ]]; then
+        if [[ ${{ matrix.model }} == "stories*"" ]]; then
             # Install requirements for export_llama
             PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
             # Test llama2
             if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
                 DELEGATE_CONFIG="xnnpack+custom+qe"
             fi
-            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
         else
             PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
         fi
 
@@ -112,7 +112,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
   test-llama-runner-linux-android:
     name: test-llama-runner-linux-android
@@ -406,4 +406,4 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
@@ -269,7 +269,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
   test-qnn-model:
     name: test-qnn-model
 
@@ -20,6 +20,6 @@ jobs:
         with:
           repository: pytorch/executorch
           stable-branch: viable/strict
-          requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", "^Android$", "^Apple$"]'
+          requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Android$\", \"^Apple$\"]'
           secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
@@ -30,7 +30,10 @@ build_android_native_library() {
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DANDROID_PLATFORM=android-23 \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
+    -DEXECUTORCH_LOG_LEVEL=Info \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
+    -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
@@ -60,11 +63,14 @@ build_android_native_library() {
 
   cmake --build "${CMAKE_OUT}"/examples/models/llama2 -j "${CMAKE_JOBS}" --config Release
 
+
   cmake extension/android \
     -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
+    -DEXECUTORCH_LOG_LEVEL=Info \
     -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
     -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
     -DCMAKE_BUILD_TYPE=Release \
 
@@ -91,19 +91,21 @@ communicate state to developers.
    <td>
 
 Use the
-<a href="https://typing-extensions.readthedocs.io/en/latest/#typing_extensions.deprecated">typing_extensions.deprecated</a>
-decorator
+<a href="https://github.com/pytorch/executorch/blob/main/exir/_warnings.py">executorch.exir._warnings.deprecated</a>
+decorator.
 
 <p>
-Use ExecuTorch's native experimental decorator (TODO not yet implemented)
+Use the
+<a href="https://github.com/pytorch/executorch/blob/main/exir/_warnings.py">executorch.exir._warnings.experimental</a>
+decorator.
 
    </td>
    <td>
 
 Use <code>.. warning::</code> in the docstrings of deprecated and experimental
 APIs. See
 <a href="https://github.com/pytorch/pytorch/blob/cd8bbdc71a0258292381a7d54c8b353988d02ff4/torch/nn/utils/stateless.py#L170">example
-usage</a>
+usage</a>.
 
 </ul>
    </td>
@@ -113,35 +115,35 @@ usage</a>
    </td>
    <td>
 
-Use <code>ET_DEPRECATED</code> macros. See <a href="https://github.com/pytorch/executorch/blob/8e0f856ee269b319ac4195509cf31e3f548aa0e8/runtime/executor/program.h#L81">example usage</a>
+Use the <code>ET_DEPRECATED</code> annotation macro. See <a href="https://github.com/pytorch/executorch/blob/8e0f856ee269b319ac4195509cf31e3f548aa0e8/runtime/executor/program.h#L81">example usage</a>.
 
 <p>
 <p>
-Use <code>ET_EXPERIMENTAL</code> macros (TODO not yet implemented)
+Use the <code>ET_EXPERIMENTAL</code> annotation macro.
 </ul>
    </td>
    <td>
 
 Start Doxygen comments with <code>DEPRECATED:</code> See
 <a href="https://github.com/pytorch/executorch/blob/9d859653ae916d0a72f6b2b5c5925bed38832140/runtime/executor/program.h#L139">example
-usage</a>
+usage</a>.
 
 <p>
 <p>
-Start Doxygen comments with <code>EXPERIMENTAL:</code>
+Start Doxygen comments with <code>EXPERIMENTAL:</code>.
    </td>
   </tr>
   <tr>
    <td>Java
    </td>
    <td>
 
-Use <a href="https://docs.oracle.com/javase/9/docs/api/java/lang/Deprecated.html">java.lang.Deprecated</a>
+Use <a href="https://docs.oracle.com/javase/9/docs/api/java/lang/Deprecated.html">java.lang.Deprecated</a>.
 
 <p>
 <p>
 
-Use <a href="https://cs.android.com/androidx/platform/frameworks/support/+/androidx-main:docs/api_guidelines/annotations.md">androidx.annotation.RequiresOptIn</a>
+Use <a href="https://cs.android.com/androidx/platform/frameworks/support/+/androidx-main:docs/api_guidelines/annotations.md">androidx.annotation.RequiresOptIn</a>.
 
    </td>
    <td>
@@ -164,7 +166,7 @@ Use <a href="https://cs.android.com/androidx/platform/frameworks/support/+/andro
 <code>__attribute__((deprecated("Use newMethod instead")));</code>
 <p>
 <p>
-<code>__attribute__((experimental("Use newMethod instead")));</code> (TODO not yet implemented)
+<code>__attribute__((deprecated("This API is experimental and may change without notice.")));</code>
    </td>
    <td>
 <p>
 
@@ -8,7 +8,7 @@
 # Install snakeviz for cProfile flamegraph
 # Install sentencepiece for llama tokenizer
 pip install snakeviz sentencepiece
-pip install torchao==0.4
+pip install torchao==0.1
 
 # Install lm-eval for Model Evaluation with lm-evalution-harness
 # Install tiktoken for tokenizer
 
@@ -24,9 +24,8 @@ class LlavaImagePrefiller : public ImagePrefiller {
    * @param start_pos The starting position in KV cache of the input in the LLM
    * @return logits of the image prefill.
    */
-  inline Result<exec_aten::Tensor> prefill(
-      Image& image,
-      int64_t start_pos = 0) {
+  inline Result<exec_aten::Tensor> prefill(Image& image, int64_t start_pos = 0)
+      override {
     ManagedTensor managed_images(
         image.data.data(), {3, image.height, image.width}, ScalarType::Byte);
     // Run image encoder
 
@@ -0,0 +1,22 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    runtime.cxx_library(
+        name = "runner",
+        srcs = ["llava_runner.cpp"],
+        exported_headers = ["llava_runner.h", "llava_image_prefiller.h", "llava_text_decoder_runner.h"],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+        exported_deps = [
+            "//executorch/backends/xnnpack:xnnpack_backend",
+            "//executorch/extension/llm/runner:runner_lib",
+            "//executorch/extension/llm/tokenizer:bpe_tokenizer",
+            "//executorch/extension/evalue_util:print_evalue",
+            "//executorch/extension/runner_util:managed_tensor",
+            "//executorch/extension/module:module",
+            "//executorch/kernels/quantized:generated_lib",
+            "//executorch/runtime/core/exec_aten:lib",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ],
+    )
@@ -109,6 +109,7 @@ python_unittest(
     deps = [
         "//caffe2:torch",
         "//executorch/exir:lib",
+        "//executorch/extension/pybindings:portable_lib",
     ],
 )
 
@@ -209,6 +210,7 @@ python_unittest(
         "//executorch/exir/passes:debug_handle_generator_pass",
         "//executorch/exir/passes:insert_write_back_for_buffers_pass",
         "//executorch/exir/passes:lib",
+        "//executorch/exir/passes:memory_format_ops_pass",
         "//executorch/exir/passes:normalize_view_copy_base_pass",
         "//executorch/exir/passes:remove_graph_asserts_pass",
         "//executorch/exir/passes:remove_mixed_type_operators",
 
@@ -11,6 +11,10 @@
 import torch._dynamo
 
 from executorch.exir import to_edge
+
+from executorch.extension.pybindings.portable_lib import (
+    _load_for_executorch_from_buffer,
+)
 from torch.export._trace import _export
 from torch.export.experimental import _export_forward_backward
 from torch.export.exported_program import OutputKind
@@ -89,3 +93,18 @@ def forward(self, x, y):
             .val.allocation_info.memory_offset_low,
             48,
         )
+
+        loss = m(*example_inputs)
+        loss.backward()
+        et_mod = _load_for_executorch_from_buffer(et.buffer)
+        et_outputs = et_mod.forward(
+            example_inputs
+        )  # ET outputs are [loss, grads, weights]
+
+        self.assertTrue(torch.allclose(loss, et_outputs[0]))
+        self.assertTrue(
+            torch.allclose(m.linear.weight.grad, et_outputs[1])  # pyre-ignore[6]
+        )
+        self.assertTrue(torch.allclose(m.linear.bias.grad, et_outputs[2]))
+        self.assertTrue(torch.allclose(m.linear.weight, et_outputs[3]))
+        self.assertTrue(torch.allclose(m.linear.bias, et_outputs[4]))
@@ -79,6 +79,11 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
     TARGET llama_runner PROPERTY IMPORTED_LOCATION ${LLAMA_RUNNER_PATH}
   )
 
+  add_subdirectory(
+    ${EXECUTORCH_ROOT}/examples/models/llava/runner
+    ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llava/runner
+  )
+
   set(CUSTOM_OPS_PATH
       ${CMAKE_CURRENT_BINARY_DIR}/../../extension/llm/custom_ops/libcustom_ops.a
   )
@@ -116,6 +121,7 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
     executorch_llama_jni
     ${link_libraries}
     llama_runner
+    llava_runner
     custom_ops
     cpublas
     eigen_blas