Skip to content

[WIP][Llava] Add support to cross compile llava_runner for Android #5108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 108 additions & 36 deletions .ci/scripts/test_llava.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,97 @@ set -exu
# shellcheck source=/dev/null

BUILD_TYPE=${1:-Debug}
TARGET_OS=${2:-Native}
BUILD_DIR=${3:-cmake-out}

echo "Building with BUILD_TYPE: $BUILD_TYPE"
echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
PYTHON_EXECUTABLE=python3
fi

TARGET_OS_lower="$(echo "${TARGET_OS}" | awk '{print tolower($0)}')"
if [[ "${TARGET_OS_lower}" == "android" ]]; then
if [[ -z "${ANDROID_NDK}" ]]; then
echo "Set ANDROID_NDK environment variable to build for Android."
exit 1
fi
fi

# Number of processes for a parallel build
NPROC=8
if hash nproc &> /dev/null; then NPROC=$(nproc); fi

EXECUTORCH_COMMON_CMAKE_ARGS=" \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
-DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON"

cmake_install_executorch_libraries() {
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
-DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \
-Bcmake-out .


cmake --build cmake-out -j9 --target install --config ${BUILD_TYPE}
cmake \
${EXECUTORCH_COMMON_CMAKE_ARGS} \
-B${BUILD_DIR} .

cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
}

cmake_install_executorch_libraries_for_android() {
cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${EXECUTORCH_COMMON_CMAKE_ARGS} \
-B${BUILD_DIR} .

cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
}


LLAVA_COMMON_CMAKE_ARGS=" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON"

cmake_build_llava_runner() {
dir=examples/models/llava
python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')

cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DCMAKE_PREFIX_PATH="$python_lib" \
-Bcmake-out/${dir} \
cmake \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-B${BUILD_DIR}/${dir} \
${dir}

cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
}


cmake --build cmake-out/${dir} -j9 --config ${BUILD_TYPE}
cmake_build_llava_runner_for_android() {
dir=examples/models/llava
python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')

cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
-B${BUILD_DIR}/${dir} \
${dir}

cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
}

# only export the one without custom op for now since it's
Expand Down Expand Up @@ -81,13 +130,24 @@ run_and_verify() {
echo "tokenizer.bin is missing."
exit 1
fi
RUNTIME_ARGS="--model_path=llava.pte \
--tokenizer_path=tokenizer.bin \
--image_path=image.pt \
--prompt=ASSISTANT: \
--temperature=0 \
--seq_len=650"
cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt



RUNTIME_ARGS="--model_path=llava.pte \
--tokenizer_path=tokenizer.bin \
--image_path=image.pt \
--prompt=ASSISTANT: \
--temperature=0 \
--seq_len=650"

if [[ "${TARGET_OS_lower}" == "android" ]]; then
echo "Transfer relevant files to the phone via ADB and run llava_main with following args,"
echo "$ llava_main ${RUNTIME_ARGS} "
exit 0;
fi

${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt

# verify result.txt
RESULT=$(cat result.txt)
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
Expand All @@ -110,8 +170,20 @@ run_and_verify() {
fi
}

cmake_install_executorch_libraries
cmake_build_llava_runner
# Step1. Build stuff
if [[ "${TARGET_OS_lower}" == "android" ]]; then
cmake_install_executorch_libraries_for_android
cmake_build_llava_runner_for_android
elif [[ "${TARGET_OS_lower}" == "native" ]]; then
cmake_install_executorch_libraries
cmake_build_llava_runner
else
echo "Invalid TARGET_OS ($2): ${TARGET_OS}"
fi

# Step2. Generate the PTE
export_llava

# Step3. Run
prepare_image_tensor
run_and_verify
18 changes: 16 additions & 2 deletions examples/models/llava/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ project(llava)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)

# This is a temporary hack to get around Torch dep so we can test this on android
option(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE "Hack option to feed dummy image to remove torch.load dep" OFF)

include(CMakeDependentOption)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
Expand Down Expand Up @@ -70,7 +73,14 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

find_package(Torch CONFIG REQUIRED)
# Avoid torch dep from torch.load()-ing the image.
# This is a temporary hack.
if(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
add_definitions(-DLLAVA_NO_TORCH_DUMMY_IMAGE=1)
message("Buidling the runner without Torch, feeding a dummy image!")
else()
find_package(Torch CONFIG REQUIRED)
endif()
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)

#
Expand All @@ -95,7 +105,11 @@ endif()
# llava_runner library
add_subdirectory(runner)

set(link_libraries gflags torch)
set(LINK_LIBS gflags)
if(NOT LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
list(APPEND LINK_LIBS torch)
endif()
set(link_libraries ${LINK_LIBS})
set(_srcs main.cpp)

if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
Expand Down
15 changes: 15 additions & 0 deletions examples/models/llava/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

#include <executorch/examples/models/llava/runner/llava_runner.h>
#include <gflags/gflags.h>
#ifndef LLAVA_NO_TORCH_DUMMY_IMAGE
#include <torch/torch.h>
#else
#include <algorithm> // std::fill
#endif

#if defined(ET_USE_THREADPOOL)
#include <executorch/extension/threadpool/cpuinfo_utils.h>
Expand Down Expand Up @@ -80,6 +84,15 @@ int32_t main(int32_t argc, char** argv) {

// read image and resize the longest edge to 336
std::vector<uint8_t> image_data;

#ifdef LLAVA_NO_TORCH_DUMMY_IMAGE
// Work without torch using a random data
image_data.resize(3 * 240 * 336);
std::fill(image_data.begin(), image_data.end(), 0); // black
std::array<int32_t, 3> image_shape = {3, 240, 336};
std::vector<torch::executor::Image> images = {
{.data = image_data, .width = image_shape[2], .height = image_shape[1]}};
#else // LLAVA_NO_TORCH_DUMMY_IMAGE
// cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
// int longest_edge = std::max(image.rows, image.cols);
// float scale_factor = 336.0f / longest_edge;
Expand All @@ -102,6 +115,8 @@ int32_t main(int32_t argc, char** argv) {
{.data = image_data,
.width = static_cast<int32_t>(image_tensor.size(2)),
.height = static_cast<int32_t>(image_tensor.size(1))}};
#endif // LLAVA_NO_TORCH_DUMMY_IMAGE

// generate
runner.generate(std::move(images), prompt, seq_len);
return 0;
Expand Down
Loading