Skip to content

Commit c2a2b1a

Browse files
committed
[llava][21/N] Add llava runner test binary and build script
ghstack-source-id: a377c43 Pull Request resolved: #4667
1 parent 8af6645 commit c2a2b1a

File tree

3 files changed

+361
-0
lines changed

3 files changed

+361
-0
lines changed

examples/models/llava/CMakeLists.txt

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#
8+
# Simple CMake build system for llava runner.
9+
#
10+
# ### Editing this file ###
11+
#
12+
# This file should be formatted with
13+
# ~~~
14+
# cmake-format -i CMakeLists.txt
15+
# ~~~
16+
# It should also be cmake-lint clean.
17+
#
18+
cmake_minimum_required(VERSION 3.19)
19+
project(multimodal)
20+
21+
# Duplicating options as root CMakeLists.txt
22+
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
23+
24+
25+
include(CMakeDependentOption)
26+
#
27+
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
28+
#
29+
cmake_dependent_option(
30+
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
31+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
32+
)
33+
#
34+
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
35+
#
36+
cmake_dependent_option(
37+
EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
38+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
39+
)
40+
41+
if(NOT PYTHON_EXECUTABLE)
42+
set(PYTHON_EXECUTABLE python3)
43+
endif()
44+
45+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
46+
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
47+
48+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
49+
50+
if(NOT PYTHON_EXECUTABLE)
51+
resolve_python_executable()
52+
endif()
53+
54+
if(NOT CMAKE_CXX_STANDARD)
55+
set(CMAKE_CXX_STANDARD 17)
56+
# Can't set to 11 due to executor_runner.cpp make_unique
57+
endif()
58+
59+
if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
60+
set(CMAKE_TOOLCHAIN_IOS ON)
61+
else()
62+
set(CMAKE_TOOLCHAIN_IOS OFF)
63+
endif()
64+
65+
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
66+
67+
# Let files say "include <executorch/path/to/header.h>".
68+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
69+
70+
# For some reason android build is not able to find where gflags is and hence
71+
# cannot find corresponding .cmake file
72+
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
73+
find_package(gflags REQUIRED)
74+
75+
find_package(Torch CONFIG REQUIRED)
76+
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
77+
78+
#
79+
# llava_main: test binary to run llava, with tokenizer and sampler integrated
80+
#
81+
82+
# find `executorch` libraries Same as for gflags
83+
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
84+
find_package(executorch CONFIG REQUIRED)
85+
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
86+
target_link_options_shared_lib(executorch)
87+
endif()
88+
89+
# custom ops library
90+
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
91+
add_subdirectory(
92+
${EXECUTORCH_ROOT}/extension/llm/custom_ops
93+
${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/custom_ops
94+
)
95+
endif()
96+
97+
# llava_runner library
98+
add_subdirectory(runner)
99+
100+
set(link_libraries gflags torch)
101+
set(_srcs main.cpp)
102+
103+
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
104+
list(
105+
APPEND
106+
link_libraries
107+
optimized_native_cpu_ops_lib
108+
optimized_kernels
109+
portable_kernels
110+
cpublas
111+
eigen_blas
112+
)
113+
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
114+
else()
115+
list(APPEND link_libraries portable_ops_lib portable_kernels)
116+
target_link_options_shared_lib(portable_ops_lib)
117+
endif()
118+
119+
# quantized_ops_lib: Register quantized op kernels into the runtime
120+
target_link_options_shared_lib(quantized_ops_lib)
121+
list(APPEND link_libraries quantized_kernels quantized_ops_lib)
122+
123+
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
124+
target_link_options_shared_lib(custom_ops)
125+
list(APPEND link_libraries custom_ops)
126+
endif()
127+
128+
set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack)
129+
# Extra compile option and include dir for pthreadpool
130+
if(EXECUTORCH_BUILD_PTHREADPOOL)
131+
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
132+
list(APPEND link_libraries pthreadpool)
133+
# These 2 source files are included in xnnpack_backend
134+
if(NOT TARGET xnnpack_backend)
135+
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/threadpool.cpp
136+
${XNNPACK_ROOT}/threadpool/threadpool_guard.cpp
137+
)
138+
endif()
139+
list(APPEND _common_include_directories
140+
${XNNPACK_ROOT}/third-party/pthreadpool/include
141+
)
142+
endif()
143+
144+
# Extra sources for cpuinfo
145+
if(EXECUTORCH_BUILD_CPUINFO)
146+
list(APPEND link_libraries cpuinfo)
147+
list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
148+
list(APPEND _common_include_directories
149+
${XNNPACK_ROOT}/third-party/cpuinfo/include
150+
)
151+
endif()
152+
153+
# XNNPACK
154+
if(TARGET xnnpack_backend)
155+
set(xnnpack_backend_libs xnnpack_backend XNNPACK)
156+
list(APPEND link_libraries ${xnnpack_backend_libs})
157+
target_link_options_shared_lib(xnnpack_backend)
158+
endif()
159+
160+
# Vulkan backend
161+
if(TARGET vulkan_backend)
162+
list(APPEND link_libraries vulkan_backend)
163+
target_link_options_shared_lib(vulkan_backend)
164+
endif()
165+
166+
# Qnn backend
167+
if(TARGET qnn_executorch_backend)
168+
list(APPEND link_libraries qnn_executorch_backend)
169+
target_link_options_shared_lib(qnn_executorch_backend)
170+
endif()
171+
172+
# MPS backend
173+
if(TARGET mpsdelegate)
174+
list(
175+
APPEND
176+
link_libraries
177+
mpsdelegate
178+
"-framework Foundation"
179+
"-weak_framework MetalPerformanceShaders"
180+
"-weak_framework MetalPerformanceShadersGraph"
181+
"-weak_framework Metal"
182+
)
183+
target_link_options_shared_lib(mpsdelegate)
184+
endif()
185+
186+
if(TARGET coremldelegate)
187+
find_library(SQLITE_LIBRARY sqlite3)
188+
list(
189+
APPEND
190+
link_libraries
191+
coremldelegate
192+
sqlite3
193+
"-framework Foundation"
194+
"-framework CoreML"
195+
"-framework Accelerate"
196+
)
197+
target_link_options_shared_lib(coremldelegate)
198+
endif()
199+
200+
# This one is needed for cpuinfo where it uses android specific log lib
201+
if(ANDROID)
202+
list(APPEND link_libraries log)
203+
endif()
204+
205+
add_executable(llava_main ${_srcs})
206+
if(CMAKE_BUILD_TYPE STREQUAL "Release")
207+
target_link_options(llava_main PRIVATE "LINKER:--gc-sections,-s")
208+
endif()
209+
210+
target_include_directories(llava_main PUBLIC ${_common_include_directories})
211+
target_link_libraries(llava_main PUBLIC llava_runner ${link_libraries})
212+
target_compile_options(llava_main PUBLIC ${_common_compile_options})
213+
214+
if(APPLE)
215+
target_link_options_shared_lib(executorch)
216+
endif()
217+
218+
# Print all summary
219+
executorch_print_configuration_summary()

examples/models/llava/build.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/bash
2+
set +x
3+
set -o xtrace
4+
5+
cmake \
6+
-DCMAKE_INSTALL_PREFIX=cmake-out \
7+
-DCMAKE_BUILD_TYPE=Debug \
8+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
9+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
10+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
11+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
12+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
13+
-DEXECUTORCH_BUILD_XNNPACK=ON \
14+
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
15+
-Bcmake-out .
16+
17+
18+
cmake --build cmake-out -j9 --target install --config Debug
19+
20+
dir=examples/models/llava
21+
python_lib=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
22+
23+
cmake \
24+
-DCMAKE_INSTALL_PREFIX=cmake-out \
25+
-DCMAKE_BUILD_TYPE=Debug \
26+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
27+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
28+
-DEXECUTORCH_BUILD_XNNPACK=ON \
29+
-DCMAKE_PREFIX_PATH="$python_lib" \
30+
-Bcmake-out/${dir} \
31+
${dir}
32+
33+
34+
cmake --build cmake-out/${dir} -j9 --config Debug

examples/models/llava/main.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/examples/models/llava/runner/llava_runner.h>
10+
#include <gflags/gflags.h>
11+
#include <torch/torch.h>
12+
13+
#if defined(ET_USE_THREADPOOL)
14+
#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
15+
#include <executorch/backends/xnnpack/threadpool/threadpool.h>
16+
#endif
17+
18+
DEFINE_string(
19+
model_path,
20+
"llava.pte",
21+
"Model serialized in flatbuffer format.");
22+
23+
DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff.");
24+
25+
DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");
26+
27+
DEFINE_string(
28+
image_path,
29+
"",
30+
"The path to a .pt file, a serialized torch tensor for an image, longest edge resized to 336.");
31+
32+
DEFINE_double(
33+
temperature,
34+
0.8f,
35+
"Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");
36+
37+
DEFINE_int32(
38+
seq_len,
39+
1024,
40+
"Total number of tokens to generate (prompt + output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the output will be truncated to max_seq_len tokens.");
41+
42+
DEFINE_int32(
43+
cpu_threads,
44+
-1,
45+
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
46+
47+
int32_t main(int32_t argc, char** argv) {
48+
gflags::ParseCommandLineFlags(&argc, &argv, true);
49+
50+
// Create a loader to get the data of the program file. There are other
51+
// DataLoaders that use mmap() or point32_t to data that's already in memory,
52+
// and users can create their own DataLoaders to load from arbitrary sources.
53+
const char* model_path = FLAGS_model_path.c_str();
54+
55+
const char* tokenizer_path = FLAGS_tokenizer_path.c_str();
56+
57+
const char* prompt = FLAGS_prompt.c_str();
58+
59+
std::string image_path = FLAGS_image_path;
60+
61+
double temperature = FLAGS_temperature;
62+
63+
int32_t seq_len = FLAGS_seq_len;
64+
65+
int32_t cpu_threads = FLAGS_cpu_threads;
66+
67+
#if defined(ET_USE_THREADPOOL)
68+
uint32_t num_performant_cores = cpu_threads == -1
69+
? torch::executorch::cpuinfo::get_num_performant_cores()
70+
: static_cast<uint32_t>(cpu_threads);
71+
ET_LOG(
72+
Info, "Resetting threadpool with num threads = %d", num_performant_cores);
73+
if (num_performant_cores > 0) {
74+
torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
75+
num_performant_cores);
76+
}
77+
#endif
78+
// create llama runner
79+
torch::executor::LlavaRunner runner(model_path, tokenizer_path, temperature);
80+
81+
// read image and resize the longest edge to 336
82+
std::vector<uint8_t> image_data;
83+
// cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
84+
// int longest_edge = std::max(image.rows, image.cols);
85+
// float scale_factor = 336.0f / longest_edge;
86+
// cv::Size new_size(image.cols * scale_factor, image.rows * scale_factor);
87+
// cv::Mat resized_image;
88+
// cv::resize(image, resized_image, new_size);
89+
// image_data.assign(resized_image.datastart, resized_image.dataend);
90+
torch::Tensor image_tensor;
91+
torch::load(image_tensor, image_path); // CHW
92+
ET_LOG(
93+
Info,
94+
"image size(0): %" PRId64 ", size(1): %" PRId64 ", size(2): %" PRId64,
95+
image_tensor.size(0),
96+
image_tensor.size(1),
97+
image_tensor.size(2));
98+
image_data.assign(
99+
image_tensor.data_ptr<uint8_t>(),
100+
image_tensor.data_ptr<uint8_t>() + image_tensor.numel());
101+
std::vector<torch::executor::Image> images = {
102+
{.data = image_data,
103+
.width = static_cast<int32_t>(image_tensor.size(2)),
104+
.height = static_cast<int32_t>(image_tensor.size(1))}};
105+
// generate
106+
runner.generate(images, prompt, seq_len);
107+
return 0;
108+
}

0 commit comments

Comments
 (0)