Skip to content

Commit 9a88484

Browse files
committed
[llava][19/N] Add multimodal runner base class and build file
ghstack-source-id: e768077 Pull Request resolved: #4665
1 parent 6e9dc43 commit 9a88484

File tree

6 files changed

+225
-0
lines changed

6 files changed

+225
-0
lines changed

build/cmake_deps.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,19 @@ deps = [
158158
"executorch_no_prim_ops",
159159
]
160160

161+
[targets.extension_llm_runner]
162+
buck_targets = [
163+
"//extension/llm/runner:runner_lib",
164+
]
165+
filters = [
166+
".cpp$",
167+
]
168+
deps = [
169+
"executorch",
170+
"executorch_no_prim_ops",
171+
"extension_module",
172+
"extension_runner_util",
173+
]
161174
# ---------------------------------- extension end ----------------------------------
162175
# ---------------------------------- binary start ----------------------------------
163176

extension/llm/runner/CMakeLists.txt

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#
8+
# Build llm runner lib.
9+
#
10+
# ### Editing this file ###
11+
#
12+
# This file should be formatted with
13+
# ~~~
14+
# cmake-format -i CMakeLists.txt
15+
# ~~~
16+
# It should also be cmake-lint clean.
17+
#
18+
19+
if(NOT EXECUTORCH_ROOT)
20+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
21+
endif()
22+
23+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
24+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
25+
26+
#
27+
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
28+
#
29+
set(EXECUTORCH_SRCS_FILE
30+
"${CMAKE_CURRENT_BINARY_DIR}/../../../executorch_srcs.cmake"
31+
)
32+
33+
extract_sources(${EXECUTORCH_SRCS_FILE})
34+
35+
include(${EXECUTORCH_SRCS_FILE})
36+
37+
# build llm runner library
38+
list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
39+
40+
target_include_directories(
41+
extension_module INTERFACE ${_common_include_directories}
42+
)
43+
44+
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})
45+
46+
set(runner_deps executorch extension_module extension_data_loader)
47+
48+
target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
49+
50+
target_include_directories(
51+
extension_llm_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}
52+
)

extension/llm/runner/image.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// The module takes in a string as input and emits a string as output.
2+
3+
#pragma once
4+
#include <cstdint>
5+
#include <vector>
6+
7+
namespace torch::executor {
8+
9+
struct Image {
10+
// Assuming NCHW format
11+
std::vector<uint8_t> data;
12+
int32_t width;
13+
int32_t height;
14+
int32_t channels;
15+
};
16+
17+
} // namespace torch::executor
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// Given a image tensor, prefill the KV cache of a multimodal LLM.
10+
11+
#pragma once
12+
13+
#include <executorch/extension/llm/runner/image.h>
14+
#include <executorch/extension/module/module.h>
15+
16+
namespace torch::executor {
17+
18+
// Assuming kv cache and parallel prefill are enabled.
19+
class ImagePrefiller {
20+
public:
21+
ImagePrefiller(Module* module) : module_(module){};
22+
/**
23+
* Prefill an LLM Module with the given image input.
24+
* @param image The image input to the multimodal LLM.
25+
* @param start_pos The starting position in KV cache of the input in the LLM
26+
* @return The next token of the LLM Module after prefill.
27+
*/
28+
virtual Result<exec_aten::Tensor> prefill(
29+
Image& image,
30+
int64_t start_pos = 0) = 0;
31+
32+
protected:
33+
Module* module_;
34+
};
35+
36+
} // namespace torch::executor
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// A simple multimodal LLM runner that includes preprocessing and post
10+
// processing logic. The module takes in a string as input and emits a string as
11+
// output.
12+
13+
#pragma once
14+
15+
#include <cstdint>
16+
#include <functional>
17+
#include <memory>
18+
#include <string>
19+
#include <type_traits>
20+
#include <unordered_map>
21+
22+
#include <executorch/extension/llm/runner/image.h>
23+
#include <executorch/extension/llm/runner/image_prefiller.h>
24+
#include <executorch/extension/llm/runner/stats.h>
25+
#include <executorch/extension/llm/runner/text_decoder_runner.h>
26+
#include <executorch/extension/llm/runner/text_prefiller.h>
27+
#include <executorch/extension/llm/runner/text_token_generator.h>
28+
#include <executorch/extension/llm/sampler/sampler.h>
29+
#include <executorch/extension/llm/tokenizer/tokenizer.h>
30+
#include <executorch/extension/module/module.h>
31+
#include <executorch/extension/runner_util/managed_tensor.h>
32+
33+
namespace torch::executor {
34+
using Stats = ::executorch::llm::Stats;
35+
36+
class MultimodalRunner {
37+
public:
38+
explicit MultimodalRunner(
39+
const std::string& model_path,
40+
const std::string& tokenizer_path,
41+
const float temperature = 0.8f)
42+
: temperature_(temperature),
43+
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
44+
tokenizer_path_(tokenizer_path) {
45+
ET_LOG(
46+
Info,
47+
"Creating Multimodal LLM runner: model_path=%s, tokenizer_path=%s",
48+
model_path.c_str(),
49+
tokenizer_path.c_str());
50+
};
51+
52+
virtual bool is_loaded() = 0;
53+
virtual Error load() = 0;
54+
virtual Error generate(
55+
std::vector<Image>& images,
56+
const std::string& prompt,
57+
int32_t seq_len = 1024,
58+
std::function<void(const std::string&)> token_callback = {},
59+
std::function<void(const Stats&)> stats_callback = {}) = 0;
60+
61+
inline void stop() {
62+
text_token_generator_->stop();
63+
}
64+
65+
protected:
66+
// metadata
67+
int32_t vocab_size_;
68+
int32_t bos_id_;
69+
int32_t eos_id_;
70+
int32_t n_bos_;
71+
int32_t n_eos_;
72+
int32_t max_seq_len_;
73+
float temperature_;
74+
75+
// model
76+
std::unordered_set<std::string> model_methods_;
77+
std::unique_ptr<Module> module_;
78+
std::unique_ptr<TextDecoderRunner> text_decoder_runner_;
79+
std::unique_ptr<TextPrefiller> text_prefiller_;
80+
std::unique_ptr<ImagePrefiller> image_prefiller_;
81+
std::unique_ptr<TextTokenGenerator> text_token_generator_;
82+
std::string tokenizer_path_;
83+
std::unique_ptr<Tokenizer> tokenizer_;
84+
85+
// stats
86+
Stats stats_;
87+
};
88+
89+
} // namespace torch::executor

extension/llm/runner/targets.bzl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def define_common_targets():
4444
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
4545
],
4646
)
47+
4748
runtime.cxx_library(
4849
name = "text_token_generator" + aten_suffix,
4950
exported_headers = ["text_token_generator.h"],
@@ -57,3 +58,20 @@ def define_common_targets():
5758
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
5859
],
5960
)
61+
62+
runtime.cxx_library(
63+
name = "runner_lib" + aten_suffix,
64+
exported_headers = [
65+
"image_prefiller.h",
66+
"image.h",
67+
"multimodal_runner.h",
68+
],
69+
visibility = [
70+
"@EXECUTORCH_CLIENTS",
71+
],
72+
exported_deps = [
73+
":text_decoder_runner" + aten_suffix,
74+
":text_prefiller" + aten_suffix,
75+
":text_token_generator" + aten_suffix,
76+
],
77+
)

0 commit comments

Comments
 (0)