Skip to content

Commit 6efc222

Browse files
authored
[llava][19/N] Add multimodal runner base class and build file
Differential Revision: D61249552 Pull Request resolved: #4665
1 parent 7b27f9b commit 6efc222

File tree

6 files changed

+237
-0
lines changed

6 files changed

+237
-0
lines changed

build/cmake_deps.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,19 @@ deps = [
158158
"executorch_no_prim_ops",
159159
]
160160

161+
[targets.extension_llm_runner]
162+
buck_targets = [
163+
"//extension/llm/runner:runner_lib",
164+
]
165+
filters = [
166+
".cpp$",
167+
]
168+
deps = [
169+
"executorch",
170+
"executorch_no_prim_ops",
171+
"extension_module",
172+
"extension_runner_util",
173+
]
161174
# ---------------------------------- extension end ----------------------------------
162175
# ---------------------------------- binary start ----------------------------------
163176

extension/llm/runner/CMakeLists.txt

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
#
8+
# Build llm runner lib.
9+
#
10+
# ### Editing this file ###
11+
#
12+
# This file should be formatted with
13+
# ~~~
14+
# cmake-format -i CMakeLists.txt
15+
# ~~~
16+
# It should also be cmake-lint clean.
17+
#
18+
19+
if(NOT EXECUTORCH_ROOT)
20+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
21+
endif()
22+
23+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
24+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
25+
26+
#
27+
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
28+
#
29+
set(EXECUTORCH_SRCS_FILE
30+
"${CMAKE_CURRENT_BINARY_DIR}/../../../executorch_srcs.cmake"
31+
)
32+
33+
extract_sources(${EXECUTORCH_SRCS_FILE})
34+
35+
include(${EXECUTORCH_SRCS_FILE})
36+
37+
# build llm runner library
38+
list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
39+
40+
target_include_directories(
41+
extension_module INTERFACE ${_common_include_directories}
42+
)
43+
44+
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})
45+
46+
set(runner_deps executorch extension_module extension_data_loader)
47+
48+
target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
49+
50+
target_include_directories(
51+
extension_llm_runner INTERFACE ${_common_include_directories}
52+
${EXECUTORCH_ROOT}
53+
)

extension/llm/runner/image.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// A simple image struct.
10+
11+
#pragma once
12+
#include <cstdint>
13+
// patternlint-disable-next-line executorch-cpp-nostdinc
14+
#include <vector>
15+
16+
namespace torch::executor {
17+
18+
struct Image {
19+
// Assuming NCHW format
20+
std::vector<uint8_t> data;
21+
int32_t width;
22+
int32_t height;
23+
int32_t channels;
24+
};
25+
26+
} // namespace torch::executor
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// Given a image tensor, prefill the KV cache of a multimodal LLM.
10+
11+
#pragma once
12+
13+
#include <executorch/extension/llm/runner/image.h>
14+
#include <executorch/extension/module/module.h>
15+
16+
namespace torch::executor {
17+
18+
// Assuming kv cache and parallel prefill are enabled.
19+
class ImagePrefiller {
20+
public:
21+
explicit ImagePrefiller(Module* module) : module_(module) {}
22+
/**
23+
* Prefill an LLM Module with the given image input.
24+
* @param image The image input to the multimodal LLM.
25+
* @param start_pos The starting position in KV cache of the input in the LLM
26+
* @return The next token of the LLM Module after prefill.
27+
*/
28+
virtual Result<exec_aten::Tensor> prefill(
29+
Image& image,
30+
int64_t start_pos = 0) = 0;
31+
32+
protected:
33+
Module* module_;
34+
};
35+
36+
} // namespace torch::executor
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// A simple multimodal LLM runner that includes preprocessing and post
10+
// processing logic. The module takes in a string as input and emits a string as
11+
// output.
12+
13+
#pragma once
14+
15+
#include <cstdint>
16+
// patternlint-disable-next-line executorch-cpp-nostdinc
17+
#include <functional>
18+
#include <memory>
19+
// patternlint-disable-next-line executorch-cpp-nostdinc
20+
#include <string>
21+
#include <type_traits>
22+
// patternlint-disable-next-line executorch-cpp-nostdinc
23+
#include <unordered_map>
24+
25+
#include <executorch/extension/llm/runner/image.h>
26+
#include <executorch/extension/llm/runner/image_prefiller.h>
27+
#include <executorch/extension/llm/runner/stats.h>
28+
#include <executorch/extension/llm/runner/text_decoder_runner.h>
29+
#include <executorch/extension/llm/runner/text_prefiller.h>
30+
#include <executorch/extension/llm/runner/text_token_generator.h>
31+
#include <executorch/extension/llm/sampler/sampler.h>
32+
#include <executorch/extension/llm/tokenizer/tokenizer.h>
33+
#include <executorch/extension/module/module.h>
34+
#include <executorch/extension/runner_util/managed_tensor.h>
35+
36+
namespace torch::executor {
37+
using Stats = ::executorch::llm::Stats;
38+
39+
class MultimodalRunner {
40+
public:
41+
explicit MultimodalRunner(
42+
const std::string& model_path,
43+
const std::string& tokenizer_path,
44+
const float temperature = 0.8f)
45+
: temperature_(temperature),
46+
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
47+
tokenizer_path_(tokenizer_path) {
48+
ET_LOG(
49+
Info,
50+
"Creating Multimodal LLM runner: model_path=%s, tokenizer_path=%s",
51+
model_path.c_str(),
52+
tokenizer_path.c_str());
53+
}
54+
55+
virtual bool is_loaded() = 0;
56+
virtual Error load() = 0;
57+
virtual Error generate(
58+
std::vector<Image>& images,
59+
const std::string& prompt,
60+
int32_t seq_len = 1024,
61+
std::function<void(const std::string&)> token_callback = {},
62+
std::function<void(const Stats&)> stats_callback = {}) = 0;
63+
64+
inline void stop() {
65+
text_token_generator_->stop();
66+
}
67+
68+
protected:
69+
// metadata
70+
int32_t vocab_size_;
71+
int32_t bos_id_;
72+
int32_t eos_id_;
73+
int32_t n_bos_;
74+
int32_t n_eos_;
75+
int32_t max_seq_len_;
76+
float temperature_;
77+
78+
// model
79+
std::unordered_set<std::string> model_methods_;
80+
std::unique_ptr<Module> module_;
81+
std::unique_ptr<TextDecoderRunner> text_decoder_runner_;
82+
std::unique_ptr<TextPrefiller> text_prefiller_;
83+
std::unique_ptr<ImagePrefiller> image_prefiller_;
84+
std::unique_ptr<TextTokenGenerator> text_token_generator_;
85+
std::string tokenizer_path_;
86+
std::unique_ptr<Tokenizer> tokenizer_;
87+
88+
// stats
89+
Stats stats_;
90+
};
91+
92+
} // namespace torch::executor

extension/llm/runner/targets.bzl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,20 @@ def define_common_targets():
6969
"//executorch/extension/module:module" + aten_suffix,
7070
],
7171
)
72+
73+
runtime.cxx_library(
74+
name = "runner_lib" + aten_suffix,
75+
exported_headers = [
76+
"image_prefiller.h",
77+
"image.h",
78+
"multimodal_runner.h",
79+
],
80+
visibility = [
81+
"@EXECUTORCH_CLIENTS",
82+
],
83+
exported_deps = [
84+
":text_decoder_runner" + aten_suffix,
85+
":text_prefiller" + aten_suffix,
86+
":text_token_generator" + aten_suffix,
87+
],
88+
)

0 commit comments

Comments
 (0)