Skip to content

Commit 45e9f6b

Browse files
authored
[llava][21/N] Add llava runner test binary and build script (#4667)
* [llava][18/N] Move token generation loop to a class As titled. This PR moves the token generation loop in llama2 runner into a new class so it can be reused. [ghstack-poisoned] * [llava][19/N] Add multimodal runner base class and build file [ghstack-poisoned] * [llava][20/N] Add llava runner using building blocks in e/llm/runner [ghstack-poisoned] * [llava][21/N] Add llava runner test binary and build script [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned] * Update base for Update on "[llava][21/N] Add llava runner test binary and build script" Add a `main.cpp` and CMakeLists.txt for llava runner. This runner takes in an image in the format of `.pt` (a serialized pytorch model) along with text prompt. It will generate text tokens in a way similar to llama runner. Run `build.sh` to build the runner. To serialize the image into a `.pt` file, run the following script: ```python import torch from torch import nn copy = torch.tensor(resized) m = nn.Module() par = nn.Parameter(copy, requires_grad=False) m.register_parameter("0",par) tensors = torch.jit.script(m) tensors.save("image.pt") ``` To run the runner, use the following command: ``` cmake-out/examples/models/llava/llava_main \ --tokenizer_path tokenizer.bin \ --model_path llava_kv_768.pte \ --prompt "\nWhat are the things I should be cautious about when I visit here?" \ --image_path image.pt \ --temperature 0 ``` Differential Revision: [D61146432](https://www.internalfb.com/diff/D61146432) [ghstack-poisoned]
1 parent 622de2d commit 45e9f6b

File tree

13 files changed

+670
-41
lines changed

13 files changed

+670
-41
lines changed

.ci/scripts/test_llava.sh

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
# shellcheck source=/dev/null
10+
11+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
12+
PYTHON_EXECUTABLE=python3
13+
fi
14+
15+
cmake_install_executorch_libraries() {
16+
cmake \
17+
-DCMAKE_INSTALL_PREFIX=cmake-out \
18+
-DCMAKE_BUILD_TYPE=Debug \
19+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
20+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
21+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
22+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
23+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
24+
-DEXECUTORCH_BUILD_XNNPACK=ON \
25+
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
26+
-Bcmake-out .
27+
28+
29+
cmake --build cmake-out -j9 --target install --config Debug
30+
}
31+
32+
cmake_build_llava_runner() {
33+
dir=examples/models/llava
34+
python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
35+
36+
cmake \
37+
-DCMAKE_INSTALL_PREFIX=cmake-out \
38+
-DCMAKE_BUILD_TYPE=Debug \
39+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
40+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
41+
-DEXECUTORCH_BUILD_XNNPACK=ON \
42+
-DCMAKE_PREFIX_PATH="$python_lib" \
43+
-Bcmake-out/${dir} \
44+
${dir}
45+
46+
47+
cmake --build cmake-out/${dir} -j9 --config Debug
48+
}
49+
50+
# only export the one without custom op for now since it's
51+
export_llava() {
52+
echo "Starting to export Llava. This will take about 6 mins"
53+
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
54+
}
55+
56+
run_and_verify() {
57+
NOW=$(date +"%H:%M:%S")
58+
echo "Starting to run llava runner at ${NOW}"
59+
if [[ ! -f "llava.pte" ]]; then
60+
echo "Export failed. Abort"
61+
exit 1
62+
fi
63+
if [[ ! -f "image.pt" ]]; then
64+
echo "image.pt is missing."
65+
exit 1
66+
fi
67+
if [[ ! -f "tokenizer.bin" ]]; then
68+
echo "tokenizer.bin is missing."
69+
exit 1
70+
fi
71+
RUNTIME_ARGS="--model_path=llava.pte \
72+
--tokenizer_path=tokenizer.bin \
73+
--image_path=image.pt \
74+
--prompt=ASSISTANT: \
75+
--temperature=0 \
76+
--seq_len=650"
77+
cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt
78+
# verify result.txt
79+
RESULT=$(cat result.txt)
80+
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
81+
EXPECTED_PREFIX="ASSISTANT:"
82+
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
83+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
84+
echo "Actual result: ${RESULT}"
85+
echo "Success"
86+
exit 0
87+
else
88+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
89+
echo "Actual result: ${RESULT}"
90+
echo "Failure; results not the same"
91+
exit 1
92+
fi
93+
}
94+
95+
cmake_install_executorch_libraries
96+
cmake_build_llava_runner
97+
export_llava
98+
run_and_verify

.github/workflows/pull.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ jobs:
187187
# Test selective build
188188
PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
189189
190-
test-export-llava-linux:
190+
test-llava-runner-linux:
191191
name: test-export-llava-linux
192192
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
193193
strategy:
@@ -215,6 +215,9 @@ jobs:
215215
# run python unittest
216216
python -m unittest examples.models.llava.test.test_llava
217217
218+
# run e2e (export, tokenizer and runner)
219+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh
220+
218221
test-quantized-aot-lib-linux:
219222
name: test-quantized-aot-lib-linux
220223
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER)
130130
add_definitions(-DET_EVENT_TRACER_ENABLED)
131131
endif()
132132

133+
option(EXECUTORCH_DO_NOT_USE_CXX11_ABI "Define _GLIBCXX_USE_CXX11_ABI=0 if ON"
134+
OFF
135+
)
136+
if(EXECUTORCH_DO_NOT_USE_CXX11_ABI)
137+
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
138+
endif()
133139
# -ffunction-sections -fdata-sections: breaks function and data into sections so
134140
# they can be properly gc'd. -s: strip symbol. -fno-exceptions -fno-rtti:
135141
# disables exceptions and runtime type.

examples/models/llava/CMakeLists.txt

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#
8+
# Simple CMake build system for llava runner.
9+
#
10+
# ### Editing this file ###
11+
#
12+
# This file should be formatted with
13+
# ~~~
14+
# cmake-format -i CMakeLists.txt
15+
# ~~~
16+
# It should also be cmake-lint clean.
17+
#
18+
cmake_minimum_required(VERSION 3.19)
19+
project(llava)
20+
21+
# Duplicating options as root CMakeLists.txt
22+
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
23+
24+
25+
include(CMakeDependentOption)
26+
#
27+
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
28+
#
29+
cmake_dependent_option(
30+
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
31+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
32+
)
33+
#
34+
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
35+
#
36+
cmake_dependent_option(
37+
EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
38+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
39+
)
40+
41+
if(NOT PYTHON_EXECUTABLE)
42+
set(PYTHON_EXECUTABLE python3)
43+
endif()
44+
45+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
46+
47+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
48+
49+
if(NOT PYTHON_EXECUTABLE)
50+
resolve_python_executable()
51+
endif()
52+
53+
if(NOT CMAKE_CXX_STANDARD)
54+
set(CMAKE_CXX_STANDARD 17)
55+
# Can't set to 11 due to executor_runner.cpp make_unique
56+
endif()
57+
58+
if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
59+
set(CMAKE_TOOLCHAIN_IOS ON)
60+
else()
61+
set(CMAKE_TOOLCHAIN_IOS OFF)
62+
endif()
63+
64+
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
65+
66+
# Let files say "include <executorch/path/to/header.h>".
67+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
68+
69+
# For some reason android build is not able to find where gflags is and hence
70+
# cannot find corresponding .cmake file
71+
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
72+
find_package(gflags REQUIRED)
73+
74+
find_package(Torch CONFIG REQUIRED)
75+
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
76+
77+
#
78+
# llava_main: test binary to run llava, with tokenizer and sampler integrated
79+
#
80+
81+
# find `executorch` libraries Same as for gflags
82+
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
83+
find_package(executorch CONFIG REQUIRED)
84+
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
85+
target_link_options_shared_lib(executorch)
86+
endif()
87+
88+
# custom ops library
89+
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
90+
add_subdirectory(
91+
${EXECUTORCH_ROOT}/extension/llm/custom_ops
92+
${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/custom_ops
93+
)
94+
endif()
95+
96+
# llava_runner library
97+
add_subdirectory(runner)
98+
99+
set(link_libraries gflags torch)
100+
set(_srcs main.cpp)
101+
102+
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
103+
list(
104+
APPEND
105+
link_libraries
106+
optimized_native_cpu_ops_lib
107+
optimized_kernels
108+
portable_kernels
109+
cpublas
110+
eigen_blas
111+
)
112+
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
113+
else()
114+
list(APPEND link_libraries portable_ops_lib portable_kernels)
115+
target_link_options_shared_lib(portable_ops_lib)
116+
endif()
117+
118+
# quantized_ops_lib: Register quantized op kernels into the runtime
119+
target_link_options_shared_lib(quantized_ops_lib)
120+
list(APPEND link_libraries quantized_kernels quantized_ops_lib)
121+
122+
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
123+
target_link_options_shared_lib(custom_ops)
124+
list(APPEND link_libraries custom_ops)
125+
endif()
126+
127+
set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack)
128+
# Extra compile option and include dir for pthreadpool
129+
if(EXECUTORCH_BUILD_PTHREADPOOL)
130+
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
131+
list(APPEND link_libraries pthreadpool)
132+
# These 2 source files are included in xnnpack_backend
133+
if(NOT TARGET xnnpack_backend)
134+
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/threadpool.cpp
135+
${XNNPACK_ROOT}/threadpool/threadpool_guard.cpp
136+
)
137+
endif()
138+
list(APPEND _common_include_directories
139+
${XNNPACK_ROOT}/third-party/pthreadpool/include
140+
)
141+
endif()
142+
143+
# Extra sources for cpuinfo
144+
if(EXECUTORCH_BUILD_CPUINFO)
145+
list(APPEND link_libraries cpuinfo)
146+
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
147+
list(APPEND _common_include_directories
148+
${XNNPACK_ROOT}/third-party/cpuinfo/include
149+
)
150+
endif()
151+
152+
# XNNPACK
153+
if(TARGET xnnpack_backend)
154+
set(xnnpack_backend_libs xnnpack_backend XNNPACK)
155+
list(APPEND link_libraries ${xnnpack_backend_libs})
156+
target_link_options_shared_lib(xnnpack_backend)
157+
endif()
158+
159+
# Vulkan backend
160+
if(TARGET vulkan_backend)
161+
list(APPEND link_libraries vulkan_backend)
162+
target_link_options_shared_lib(vulkan_backend)
163+
endif()
164+
165+
# Qnn backend
166+
if(TARGET qnn_executorch_backend)
167+
list(APPEND link_libraries qnn_executorch_backend)
168+
target_link_options_shared_lib(qnn_executorch_backend)
169+
endif()
170+
171+
# MPS backend
172+
if(TARGET mpsdelegate)
173+
list(
174+
APPEND
175+
link_libraries
176+
mpsdelegate
177+
"-framework Foundation"
178+
"-weak_framework MetalPerformanceShaders"
179+
"-weak_framework MetalPerformanceShadersGraph"
180+
"-weak_framework Metal"
181+
)
182+
target_link_options_shared_lib(mpsdelegate)
183+
endif()
184+
185+
if(TARGET coremldelegate)
186+
find_library(SQLITE_LIBRARY sqlite3)
187+
list(
188+
APPEND
189+
link_libraries
190+
coremldelegate
191+
sqlite3
192+
"-framework Foundation"
193+
"-framework CoreML"
194+
"-framework Accelerate"
195+
)
196+
target_link_options_shared_lib(coremldelegate)
197+
endif()
198+
199+
# This one is needed for cpuinfo where it uses android specific log lib
200+
if(ANDROID)
201+
list(APPEND link_libraries log)
202+
endif()
203+
204+
add_executable(llava_main ${_srcs})
205+
if(CMAKE_BUILD_TYPE STREQUAL "Release")
206+
target_link_options(llava_main PRIVATE "LINKER:--gc-sections,-s")
207+
endif()
208+
209+
target_include_directories(llava_main PUBLIC ${_common_include_directories})
210+
target_link_libraries(llava_main PUBLIC llava_runner ${link_libraries})
211+
target_compile_options(llava_main PUBLIC ${_common_compile_options})
212+
213+
if(APPLE)
214+
target_link_options_shared_lib(executorch)
215+
endif()
216+
217+
# Print all summary
218+
executorch_print_configuration_summary()

0 commit comments

Comments
 (0)