pytorch
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 98 additions & 0 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 98 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/pull.yml
Lines changed: 4 additions & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 6 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/models/llava/CMakeLists.txt
Lines changed: 218 additions & 0 deletions b/‎examples/models/llava/CMakeLists.txt
Lines changed: 218 additions & 0 deletions
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+cmake_install_executorch_libraries() {
+    cmake                                               \
+        -DCMAKE_INSTALL_PREFIX=cmake-out                \
+        -DCMAKE_BUILD_TYPE=Debug                        \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON          \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON     \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON            \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON         \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON         \
+        -DEXECUTORCH_BUILD_XNNPACK=ON                   \
+        -DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON            \
+        -Bcmake-out .
+
+
+    cmake --build cmake-out -j9 --target install --config Debug
+}
+
+cmake_build_llava_runner() {
+    dir=examples/models/llava
+    python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+
+    cmake                                       \
+        -DCMAKE_INSTALL_PREFIX=cmake-out        \
+        -DCMAKE_BUILD_TYPE=Debug                \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON    \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON           \
+        -DCMAKE_PREFIX_PATH="$python_lib"       \
+        -Bcmake-out/${dir}                      \
+        ${dir}
+
+
+    cmake --build cmake-out/${dir} -j9 --config Debug
+}
+
+# only export the one without custom op for now since it's
+export_llava() {
+    echo "Starting to export Llava. This will take about 6 mins"
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run llava runner at ${NOW}"
+    if [[ ! -f "llava.pte" ]]; then
+        echo "Export failed. Abort"
+        exit 1
+    fi
+    if [[ ! -f "image.pt" ]]; then
+        echo "image.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.bin" ]]; then
+        echo "tokenizer.bin is missing."
+        exit 1
+    fi
+    RUNTIME_ARGS="--model_path=llava.pte \
+     --tokenizer_path=tokenizer.bin \
+     --image_path=image.pt \
+     --prompt=ASSISTANT: \
+     --temperature=0 \
+     --seq_len=650"
+    cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt
+    # verify result.txt
+    RESULT=$(cat result.txt)
+    # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
+    EXPECTED_PREFIX="ASSISTANT:"
+    if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
+        echo "Expected result prefix: ${EXPECTED_PREFIX}"
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Expected result prefix: ${EXPECTED_PREFIX}"
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+cmake_install_executorch_libraries
+cmake_build_llava_runner
+export_llava
+run_and_verify
@@ -187,7 +187,7 @@ jobs:
         # Test selective build
         PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
 
-  test-export-llava-linux:
+  test-llava-runner-linux:
     name: test-export-llava-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
@@ -215,6 +215,9 @@ jobs:
         # run python unittest
         python -m unittest examples.models.llava.test.test_llava
 
+        # run e2e (export, tokenizer and runner)
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh
+
   test-quantized-aot-lib-linux:
     name: test-quantized-aot-lib-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
 
@@ -130,6 +130,12 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER)
   add_definitions(-DET_EVENT_TRACER_ENABLED)
 endif()
 
+option(EXECUTORCH_DO_NOT_USE_CXX11_ABI "Define _GLIBCXX_USE_CXX11_ABI=0 if ON"
+       OFF
+)
+if(EXECUTORCH_DO_NOT_USE_CXX11_ABI)
+  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+endif()
 # -ffunction-sections -fdata-sections: breaks function and data into sections so
 # they can be properly gc'd. -s: strip symbol. -fno-exceptions -fno-rtti:
 # disables exceptions and runtime type.
 
@@ -0,0 +1,218 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+#
+# Simple CMake build system for llava runner.
+#
+# ### Editing this file ###
+#
+# This file should be formatted with
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+# It should also be cmake-lint clean.
+#
+cmake_minimum_required(VERSION 3.19)
+project(llava)
+
+# Duplicating options as root CMakeLists.txt
+option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
+
+
+include(CMakeDependentOption)
+#
+# pthreadpool: build pthreadpool library. Disable on unsupported platforms
+#
+cmake_dependent_option(
+  EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
+  "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
+)
+#
+# cpuinfo: build cpuinfo library. Disable on unsupported platforms
+#
+cmake_dependent_option(
+  EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
+  "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
+)
+
+if(NOT PYTHON_EXECUTABLE)
+  set(PYTHON_EXECUTABLE python3)
+endif()
+
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
+
+include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+
+if(NOT PYTHON_EXECUTABLE)
+  resolve_python_executable()
+endif()
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+  # Can't set to 11 due to executor_runner.cpp make_unique
+endif()
+
+if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
+  set(CMAKE_TOOLCHAIN_IOS ON)
+else()
+  set(CMAKE_TOOLCHAIN_IOS OFF)
+endif()
+
+set(_common_compile_options -Wno-deprecated-declarations -fPIC)
+
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+
+# For some reason android build is not able to find where gflags is and hence
+# cannot find corresponding .cmake file
+set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
+find_package(gflags REQUIRED)
+
+find_package(Torch CONFIG REQUIRED)
+add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+
+#
+# llava_main: test binary to run llava, with tokenizer and sampler integrated
+#
+
+# find `executorch` libraries Same as for gflags
+set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
+find_package(executorch CONFIG REQUIRED)
+if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
+  target_link_options_shared_lib(executorch)
+endif()
+
+# custom ops library
+if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+  add_subdirectory(
+    ${EXECUTORCH_ROOT}/extension/llm/custom_ops
+    ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/custom_ops
+  )
+endif()
+
+# llava_runner library
+add_subdirectory(runner)
+
+set(link_libraries gflags torch)
+set(_srcs main.cpp)
+
+if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
+  list(
+    APPEND
+    link_libraries
+    optimized_native_cpu_ops_lib
+    optimized_kernels
+    portable_kernels
+    cpublas
+    eigen_blas
+  )
+  target_link_options_shared_lib(optimized_native_cpu_ops_lib)
+else()
+  list(APPEND link_libraries portable_ops_lib portable_kernels)
+  target_link_options_shared_lib(portable_ops_lib)
+endif()
+
+# quantized_ops_lib: Register quantized op kernels into the runtime
+target_link_options_shared_lib(quantized_ops_lib)
+list(APPEND link_libraries quantized_kernels quantized_ops_lib)
+
+if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+  target_link_options_shared_lib(custom_ops)
+  list(APPEND link_libraries custom_ops)
+endif()
+
+set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack)
+# Extra compile option and include dir for pthreadpool
+if(EXECUTORCH_BUILD_PTHREADPOOL)
+  list(APPEND _common_compile_options -DET_USE_THREADPOOL)
+  list(APPEND link_libraries pthreadpool)
+  # These 2 source files are included in xnnpack_backend
+  if(NOT TARGET xnnpack_backend)
+    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/threadpool.cpp
+         ${XNNPACK_ROOT}/threadpool/threadpool_guard.cpp
+    )
+  endif()
+  list(APPEND _common_include_directories
+       ${XNNPACK_ROOT}/third-party/pthreadpool/include
+  )
+endif()
+
+# Extra sources for cpuinfo
+if(EXECUTORCH_BUILD_CPUINFO)
+  list(APPEND link_libraries cpuinfo)
+  list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
+  list(APPEND _common_include_directories
+       ${XNNPACK_ROOT}/third-party/cpuinfo/include
+  )
+endif()
+
+# XNNPACK
+if(TARGET xnnpack_backend)
+  set(xnnpack_backend_libs xnnpack_backend XNNPACK)
+  list(APPEND link_libraries ${xnnpack_backend_libs})
+  target_link_options_shared_lib(xnnpack_backend)
+endif()
+
+# Vulkan backend
+if(TARGET vulkan_backend)
+  list(APPEND link_libraries vulkan_backend)
+  target_link_options_shared_lib(vulkan_backend)
+endif()
+
+# Qnn backend
+if(TARGET qnn_executorch_backend)
+  list(APPEND link_libraries qnn_executorch_backend)
+  target_link_options_shared_lib(qnn_executorch_backend)
+endif()
+
+# MPS backend
+if(TARGET mpsdelegate)
+  list(
+    APPEND
+    link_libraries
+    mpsdelegate
+    "-framework Foundation"
+    "-weak_framework MetalPerformanceShaders"
+    "-weak_framework MetalPerformanceShadersGraph"
+    "-weak_framework Metal"
+  )
+  target_link_options_shared_lib(mpsdelegate)
+endif()
+
+if(TARGET coremldelegate)
+  find_library(SQLITE_LIBRARY sqlite3)
+  list(
+    APPEND
+    link_libraries
+    coremldelegate
+    sqlite3
+    "-framework Foundation"
+    "-framework CoreML"
+    "-framework Accelerate"
+  )
+  target_link_options_shared_lib(coremldelegate)
+endif()
+
+# This one is needed for cpuinfo where it uses android specific log lib
+if(ANDROID)
+  list(APPEND link_libraries log)
+endif()
+
+add_executable(llava_main ${_srcs})
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+  target_link_options(llava_main PRIVATE "LINKER:--gc-sections,-s")
+endif()
+
+target_include_directories(llava_main PUBLIC ${_common_include_directories})
+target_link_libraries(llava_main PUBLIC llava_runner ${link_libraries})
+target_compile_options(llava_main PUBLIC ${_common_compile_options})
+
+if(APPLE)
+  target_link_options_shared_lib(executorch)
+endif()
+
+# Print all summary
+executorch_print_configuration_summary()